The following analyses examine agricultural commodity loss, at a county level, from 2001-2015, across the 24 county region of the Inland Pacific Northwest (IPNW). In this analysis, we use a time lagged matrix approach to examine a range of combinations of climate and insurance loss for wheat due to drought.
Our analysis can be described in 9 specific steps:
Step 1. Examining wheat/drought acreage ratios compared to average precipitation, potential evapotranspiration, and aridity for the iPNW study area, 2001 to 2015.
Step 2. Construction of design matrices of all monthly climate combinations, per county and per climate variable (precipitation, maximum temperature, and potential evapotranspiration).
Step 3. Examining spatial relationships of climate lagged correlations.
Step 4. Examining Bi-variate relationships between individual climate variables and wheat/drought insurance loss, for all 24 iPNW counties, from 2001 to 2015.
Step 5. Examining conditional relationships of optimized climate/insurance loss relationships using regression based random forest modeling.
Step 6. Results of drought acreage model by county.
Step 7. Results of seasonal loss model for all 24 counties.
Step 8. Results of seasonal loss model by county.
Step 9. Plotting R2/NRMSE results of regression random forest model for each county.
Step 10. Plotting R2 results of regression random forest model for each county.
In step 1, we perform a broad examination of wheat insurance loss due to drought for our 24 county iPNW study area, by calculating the ratio of wheat/drought acreage claims vs. all other wheat related damage cause claims, and comparing that data to individual climate variables. Example: For Whitman County, WA - we calculated the total amount of insurance claim acreage for wheat due to drought (for 2001 to 2015 combined), and divided that by the total amount of wheat insurane acreage for all other damage causes (excessive moisture, hail, frost, freeze, etc). We did this for each of the 24 counties.
We then calculate a summary value for each climate variable (plus aridity - which is PR / PET), for each county, from 2001 to 2015. We then constructed three scatterplots for comparison. Each point represents a county. For precipitation and potential evapotranspiration, we use the average annual total.
ipnw_climate_county_comparison <- function(var_commodity, var_damage) {
library(plotrix)
library(ggplot2)
library(gridExtra)
library(RCurl)
#----load climate data
#
# URL <- "https://raw.githubusercontent.com/erichseamon/RFclimatePaper/master/data/climatology/climatology.zip"
# destfile <- "/tmp/climatology.zip"
# download.file(URL, destfile)
# outDir<-"/tmp/climatology/"
# unzip(destfile,exdir=outDir)
unzip("data/climatology/climatology.zip", exdir="data/climatology/")
ID2001 <- read.csv("data/climatology/Idaho_2001_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2002 <- read.csv("data/climatology/Idaho_2002_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2003 <- read.csv("data/climatology/Idaho_2003_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2004 <- read.csv("data/climatology/Idaho_2004_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2005 <- read.csv("data/climatology/Idaho_2005_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2006 <- read.csv("data/climatology/Idaho_2006_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2007 <- read.csv("data/climatology/Idaho_2007_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2008 <- read.csv("data/climatology/Idaho_2008_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2009 <- read.csv("data/climatology/Idaho_2009_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2010 <- read.csv("data/climatology/Idaho_2010_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2011 <- read.csv("data/climatology/Idaho_2011_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2012 <- read.csv("data/climatology/Idaho_2012_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2013 <- read.csv("data/climatology/Idaho_2013_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2014 <- read.csv("data/climatology/Idaho_2014_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2015 <- read.csv("data/climatology/Idaho_2015_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2001 <- read.csv("data/climatology/Oregon_2001_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2002 <- read.csv("data/climatology/Oregon_2002_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2003 <- read.csv("data/climatology/Oregon_2003_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2004 <- read.csv("data/climatology/Oregon_2004_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2005 <- read.csv("data/climatology/Oregon_2005_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2006 <- read.csv("data/climatology/Oregon_2006_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2007 <- read.csv("data/climatology/Oregon_2007_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2008 <- read.csv("data/climatology/Oregon_2008_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2009 <- read.csv("data/climatology/Oregon_2009_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2010 <- read.csv("data/climatology/Oregon_2010_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2011 <- read.csv("data/climatology/Oregon_2011_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2012 <- read.csv("data/climatology/Oregon_2012_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2013 <- read.csv("data/climatology/Oregon_2013_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2014 <- read.csv("data/climatology/Oregon_2014_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2015 <- read.csv("data/climatology/Oregon_2015_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2001 <- read.csv("data/climatology/Washington_2001_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2002 <- read.csv("data/climatology/Washington_2002_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2003 <- read.csv("data/climatology/Washington_2003_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2004 <- read.csv("data/climatology/Washington_2004_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2005 <- read.csv("data/climatology/Washington_2005_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2006 <- read.csv("data/climatology/Washington_2006_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2007 <- read.csv("data/climatology/Washington_2007_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2008 <- read.csv("data/climatology/Washington_2008_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2009 <- read.csv("data/climatology/Washington_2009_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2010 <- read.csv("data/climatology/Washington_2010_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2011 <- read.csv("data/climatology/Washington_2011_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2012 <- read.csv("data/climatology/Washington_2012_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2013 <- read.csv("data/climatology/Washington_2013_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2014 <- read.csv("data/climatology/Washington_2014_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2015 <- read.csv("data/climatology/Washington_2015_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ziggy.df <- rbind(ID2001, ID2002, ID2003, ID2004, ID2005, ID2006, ID2007, ID2008, ID2009, ID2010, ID2011, ID2012, ID2013, ID2014, ID2015)
##remove month
ziggy.df <- ziggy.df[-17]
ID1 <- aggregate(.~countyfips + year, ziggy.df, sum)
ID1 <- aggregate(.~countyfips, ID1, mean)
ziggy.df <- rbind(WA2001, WA2002, WA2003, WA2004, WA2005, WA2006, WA2007, WA2008, WA2009, WA2010, WA2011, WA2012, WA2013, WA2014, WA2015)
##remove month
ziggy.df <- ziggy.df[-17]
WA1 <- aggregate(.~countyfips + year, ziggy.df, sum)
WA1 <- aggregate(.~countyfips, WA1, mean)
ziggy.df <- rbind(OR2001, OR2002, OR2003, OR2004, OR2005, OR2006, OR2007, OR2008, OR2009, OR2010, OR2011, OR2012, OR2013, OR2014, OR2015)
##remove month
ziggy.df <- ziggy.df[-17]
OR1 <- aggregate(.~countyfips + year, ziggy.df, sum)
OR1 <- aggregate(.~countyfips, OR1, mean)
countiesfips <- read.csv("data/counties/counties_fips.csv",
header = TRUE, strip.white = TRUE, sep = ",")
#countiesfips <- read.csv("/tmp/countiesfips.csv", header = TRUE, strip.white = TRUE, sep = ",")
#countiesfips <- countiesfips[-1]
colnames(countiesfips) <- c("countyfips", "county", "state")
countiesfips$countyfips <- sprintf("%05d", countiesfips$countyfips)
OR2 <- merge(countiesfips, OR1, by=("countyfips"))
ID2 <- merge(countiesfips, ID1, by=("countyfips"))
WA2 <- merge(countiesfips, WA1, by=("countyfips"))
#------add crop insurance data
#
# URL <- "https://raw.githubusercontent.com/erichseamon/RFclimatePaper/master/data/RMA_originaldata/RMA_originaldata_txt.zip"
# destfile <- "/tmp/RMA_originaldata_txt.zip"
# download.file(URL, destfile)
# outDir<-"/tmp/RMA_originaldata/"
# unzip(destfile,exdir=outDir)
unzip("data/RMA_originaldata/RMA_originaldata_txt.zip", exdir="data/RMA_originaldata/")
rma1989 <- read.csv("data/RMA_originaldata/1989.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1990 <- read.csv("data/RMA_originaldata/1990.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1991 <- read.csv("data/RMA_originaldata/1991.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1992 <- read.csv("data/RMA_originaldata/1992.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1993 <- read.csv("data/RMA_originaldata/1993.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1994 <- read.csv("data/RMA_originaldata/1994.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1995 <- read.csv("data/RMA_originaldata/1995.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1996 <- read.csv("data/RMA_originaldata/1996.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1997 <- read.csv("data/RMA_originaldata/1997.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1998 <- read.csv("data/RMA_originaldata/1998.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1999 <- read.csv("data/RMA_originaldata/1999.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2000 <- read.csv("data/RMA_originaldata/2000.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2001 <- read.csv("data/RMA_originaldata/2001.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2002 <- read.csv("data/RMA_originaldata/2002.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2003 <- read.csv("data/RMA_originaldata/2003.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2004 <- read.csv("data/RMA_originaldata/2004.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2005 <- read.csv("data/RMA_originaldata/2005.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2006 <- read.csv("data/RMA_originaldata/2006.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2007 <- read.csv("data/RMA_originaldata/2007.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2008 <- read.csv("data/RMA_originaldata/2008.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2009 <- read.csv("data/RMA_originaldata/2009.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2010 <- read.csv("data/RMA_originaldata/2010.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2011 <- read.csv("data/RMA_originaldata/2011.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2012 <- read.csv("data/RMA_originaldata/2012.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2013 <- read.csv("data/RMA_originaldata/2013.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2014 <- read.csv("data/RMA_originaldata/2014.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2015 <- read.csv("data/RMA_originaldata/2015.txt", sep = "|", header = FALSE, strip.white = TRUE)
#load individual files from 1989 to 2000
RMA_1989 <- rbind(rma1989, rma1990, rma1991, rma1992, rma1993, rma1994, rma1995, rma1996, rma1997, rma1998, rma1999, rma2000)
#filter columns
RMA_1989 <- data.frame(RMA_1989[,1], RMA_1989[,3], RMA_1989[,5], RMA_1989[,7], RMA_1989[,12], RMA_1989[,13], RMA_1989[,14], RMA_1989[,15])
#set column names
colnames(RMA_1989) <- c("year", "state", "county", "commodity", "damagecause", "monthcode", "month", "loss")
#load individual files from 2001 to 2015
RMA <- rbind(rma2001, rma2002, rma2003, rma2004, rma2005, rma2006, rma2007, rma2008, rma2009, rma2010, rma2011, rma2012, rma2013, rma2014, rma2015)
#filter columns
RMA <- data.frame(RMA[,1], RMA[,3], RMA[,5], RMA[,7], RMA[,12], RMA[,13], RMA[,14], RMA[,15], RMA[,16])
#set column names
colnames(RMA) <- c("year", "state", "county", "commodity", "damagecause", "monthcode", "month", "acres", "loss")
#subset 2001 to 2015 for ID, WA, and OR
RMA_PNW <- subset(RMA, state == "WA" | state == "OR" | state == "ID" )
#subset 1989 to 2000 for ID, WA, and OR
RMA_PNW_1989 <- subset(RMA_1989, state == "WA" | state == "OR" | state == "ID" )
#temp = list.files(pattern = paste("Idaho", "_summary", sep=""))
#myfiles = lapply(temp, read.csv, header = TRUE)
#ziggy.df <- do.call(rbind , myfiles)
RMA_PNW <- as.data.frame(RMA_PNW)
RMA_PNW$county <- as.character(RMA_PNW$county)
RMA_PNW$damagecause <- as.character(RMA_PNW$damagecause)
xrange <- RMA_PNW
RMA_PNW$commodity <- trimws(RMA_PNW$commodity)
RMA_PNW$county <- trimws(RMA_PNW$county)
RMA_PNW$damagecause <- trimws(RMA_PNW$damagecause)
ID_RMA_PNW <- subset(RMA_PNW, state == "ID")
#--acres for all damage causes aggregated
ID_loss_commodity <- subset(ID_RMA_PNW, commodity == var_commodity)
ID_loss_all <- aggregate(ID_loss_commodity$acres, by=list(ID_loss_commodity$county, ID_loss_commodity$state), FUN = "sum")
colnames(ID_loss_all) <- c("county", "state", "all_damagecause_acres")
#-loss and lossperacre for just one damage cause
ID_loss1 <- subset(ID_RMA_PNW, commodity == var_commodity & damagecause == var_damage)
ID_loss2 <- aggregate(ID_loss1$loss, by=list(ID_loss1$county, ID_loss1$state), FUN = "sum")
ID_loss2_acres <- aggregate(ID_loss1$acres, by=list(ID_loss1$county, ID_loss1$state), FUN = "sum")
colnames(ID_loss2) <- c("county", "state", "loss")
colnames(ID_loss2_acres) <- c("county", "state", "acres")
ID_loss2$acres <- ID_loss2_acres$acres
ID_loss2$lossperacre <- ID_loss2$loss / ID_loss2$acres
#--
ID_loss_climate <- merge(ID_loss2, ID2, by=c("county", "state"))
ID_loss_climate_2 <- merge(ID_loss_all, ID_loss_climate, by=c("county", "state"))
#---WA
WA_RMA_PNW <- subset(RMA_PNW, state == "WA")
#--acres for all damage causes aggregated
WA_loss_commodity <- subset(WA_RMA_PNW, commodity == var_commodity)
WA_loss_all <- aggregate(WA_loss_commodity$acres, by=list(WA_loss_commodity$county, WA_loss_commodity$state), FUN = "sum")
colnames(WA_loss_all) <- c("county", "state", "all_damagecause_acres")
WA_loss1 <- subset(WA_RMA_PNW, commodity == var_commodity & damagecause == var_damage)
WA_loss2 <- aggregate(WA_loss1$loss, by=list(WA_loss1$county, WA_loss1$state), FUN = "sum")
WA_loss2_acres <- aggregate(WA_loss1$acres, by=list(WA_loss1$county, WA_loss1$state), FUN = "sum")
colnames(WA_loss2) <- c("county", "state", "loss")
colnames(WA_loss2_acres) <- c("county", "state", "acres")
WA_loss2$acres <- WA_loss2_acres$acres
WA_loss2$lossperacre <- WA_loss2$loss / WA_loss2$acres
WA_loss_climate <- merge(WA_loss2, WA2, by=c("county", "state"))
WA_loss_climate_2 <- merge(WA_loss_all, WA_loss_climate, by=c("county", "state"))
#--OR
OR_RMA_PNW <- subset(RMA_PNW, state == "OR")
#--acres for all damage causes aggregated
OR_loss_commodity <- subset(OR_RMA_PNW, commodity == var_commodity)
OR_loss_all <- aggregate(OR_loss_commodity$acres, by=list(OR_loss_commodity$county, OR_loss_commodity$state), FUN = "sum")
colnames(OR_loss_all) <- c("county", "state", "all_damagecause_acres")
OR_loss1 <- subset(OR_RMA_PNW, commodity == var_commodity & damagecause == var_damage)
OR_loss2 <- aggregate(OR_loss1$loss, by=list(OR_loss1$county, OR_loss1$state), FUN = "sum")
OR_loss2_acres <- aggregate(OR_loss1$acres, by=list(OR_loss1$county, OR_loss1$state), FUN = "sum")
colnames(OR_loss2) <- c("county", "state", "loss")
colnames(OR_loss2_acres) <- c("county", "state", "acres")
OR_loss2$acres <- OR_loss2_acres$acres
OR_loss2$lossperacre <- OR_loss2$loss / OR_loss2$acres
OR_loss_climate <- merge(OR_loss2, OR2, by=c("county", "state"))
OR_loss_climate_2 <- merge(OR_loss_all, OR_loss_climate, by=c("county", "state"))
#-merge all three states
iPNW_loss_climate <- rbind(OR_loss_climate_2, ID_loss_climate_2, WA_loss_climate_2)
#--subset to only iPNW 24 counties
Franklin <- subset(iPNW_loss_climate, county == "Franklin" & state == "WA")
iPNW_loss_climate <- subset(iPNW_loss_climate, county == "Benewah"
| county == "Latah" | county == "Nez Perce" | county == "Lewis"
| county == "Idaho" | county == "Wasco" | county == "Sherman"
| county == "Gilliam" | county == "Morrow" | county == "Umatilla"
| county == "Union" | county == "Wallowa" | county == "Douglas"
| county == "Walla Walla" | county == "Benton" | county == "Columbia"
| county == "Asotin" | county == "Garfield"
| county == "Grant" | county =="Whitman" | county == "Spokane"
| county == "Lincoln" | county == "Adams" )
iPNW_loss_climate <- rbind(iPNW_loss_climate, Franklin)
iPNW_loss_climate$pct_acreage <- iPNW_loss_climate$acres / iPNW_loss_climate$all_damagecause_acres
#write.csv(iPNW_loss_climate, file = "/tmp/iPNW_loss_climate.csv")
#iPNW_loss_climate <- subset(iPNW_loss_climate, year == var_year)
#tmmx
iPNW_loss_climate_tmmx <- iPNW_loss_climate[order(iPNW_loss_climate$tmmx),]
iPNW_loss_climate_tmmx$tmmx <- iPNW_loss_climate_tmmx$tmmx - 273
#not needed#yrangemin <- min(eval(parse(text=paste("iPNW_loss_climate_tmmx$", unit, sep="")))) - (.05 * min(eval(parse(text=paste("iPNW_loss_climate_tmmx$", unit, sep="")))))
#not needed#yrangemax <- max(eval(parse(text=paste("iPNW_loss_climate_tmmx$", unit, sep="")))) + (.05 * max(eval(parse(text=paste("iPNW_loss_climate_tmmx$", unit, sep="")))))
#y2rangemin <- min(iPNW_loss_climate_tmmx$tmmx) - (.05 * min(iPNW_loss_climate_tmmx$tmmx))
#y2rangemax <- max(iPNW_loss_climate_tmmx$tmmx) + (.05 * max(iPNW_loss_climate_tmmx$tmmx))
#twoord.plot(c(1:nrow(iPNW_loss_climate_tmmx)), iPNW_loss_climate_tmmx$pct_acreage, c(1:nrow(iPNW_loss_climate_tmmx)), rylim=c(y2rangemin, y2rangemax), lylim=c(0, 1), iPNW_loss_climate_tmmx$tmmx, mar=c(8,4,4,4), ylab = "% wheat insurance loss acreage due to drought", xticklab=c(" "), rylab = "Mean Max Temperature (C)", type=c("bar", "b"), lcol = "red", rcol = "blue", main = paste(" 2001 - 2015 iPNW % Wheat/drought insurance loss acreage\n compared to mean TMMX", sep=""))
#text(1:nrow(iPNW_loss_climate_tmmx), 0 - .05, srt = 90, adj = 1,
# labels = iPNW_loss_climate_tmmx$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 4, cex = 2)
#mtext(2, text = "% wheat/drought insurance loss acreage", line = 4, cex = 2)
#mtext(4, text = "Annual Mean Precipitation (mm)", line = 1, cex = 2)
#pr
par(mfrow=c(1,3),mar=c(11.8, 6.1, 4, 2.1), family = 'serif', mgp=c(4, 1, 0), las=0)
layout(matrix(c(1,2,3), 1, 3, byrow = TRUE))
iPNW_loss_climate_pr <- iPNW_loss_climate[order(-iPNW_loss_climate$pr),]
iPNW_loss_climate_pr$pr_year <- iPNW_loss_climate_pr$pr * 12
pr <- ggplot(iPNW_loss_climate_pr, aes(x=pr_year, y=pct_acreage)) +
geom_point()+
geom_smooth(method = "loess")+
xlab("Annual Precipitation (mm)") + ylab("Percentage Drought Wheat Acreage Loss") +
theme(text=element_text(size=16, family="serif"))
#yrangemin <- min(iPNW_loss_climate_pr$lossperacre) - 10
#yrangemax <- max(iPNW_loss_climate_pr$lossperacre) + 10
#y2rangemin <- min(iPNW_loss_climate_pr$pr) - (.05 * min(iPNW_loss_climate_pr$pr))
#y2rangemax <- max(iPNW_loss_climate_pr$pr) + (.05 * max(iPNW_loss_climate_pr$pr))
#twoord.plot(c(1:nrow(iPNW_loss_climate_pr)), iPNW_loss_climate_pr$pct_acreage, c(1:nrow(iPNW_loss_climate_pr)), rylim=c(y2rangemin, y2rangemax), lylim=c(0, 1), iPNW_loss_climate_pr$pr, mar=c(8,4,4,4), xticklab=c(" "), type=c("bar", "b"), lcol = "red", rcol = "blue")
#not needed#text(1:nrow(iPNW_loss_climate_pr), 0 - .05, srt = 90, adj = 1, cex = 1.5,
#not needed# labels = iPNW_loss_climate_pr$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 4, cex = 1.5)
#mtext(2, text = "% wheat/drought insurance loss acreage", line = 4, cex = 1.5)
#mtext(4, text = "Annual Mean PR (mm)", line = 1, cex = 1.5)
#mtext(3, text = "2001 - 2015 iPNW % Wheat/drought insurance loss acreage\n compared to annual mean precipitation/county", line = 1, cex = 2)
#pr
#par(mar=c(11.8, 6.1, 4, 2.1), family = 'serif', mgp=c(4, 1, 0), las=0)
#iPNW_loss_climate_pr <- iPNW_loss_climate[order(-iPNW_loss_climate$pr),]
#y2rangemin <- min(iPNW_loss_climate_pr$pr) - (.05 * min(iPNW_loss_climate_pr$pr))
#y2rangemax <- max(iPNW_loss_climate_pr$pr) + (.05 * max(iPNW_loss_climate_pr$pr))
#twoord.plot(c(1:nrow(iPNW_loss_climate_pr)), iPNW_loss_climate_pr$pct_acreage, c(1:nrow(iPNW_loss_climate_pr)), rylim=c(y2rangemin, y2rangemax), lylim=c(0, 1), iPNW_loss_climate_pr$pr, mar=c(8,4,4,4), ylab = "% wheat/drought insurance loss acreage", xticklab=c(" "), rylab = "Annual Mean Precipitation (mm)", type=c("bar", "b"), lcol = "red", rcol = "blue", main = paste("2001 - 2015 iPNW % Wheat/drought insurance loss acreage\n compared to annual mean PR/county", sep=""))
#text(1:nrow(iPNW_loss_climate_pr), 0 - .05, srt = 90, adj = 1,
# labels = iPNW_loss_climate_pr$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 9, cex = 1)
#pet
iPNW_loss_climate_pet <- iPNW_loss_climate[order(iPNW_loss_climate$pet),]
iPNW_loss_climate_pet$pet_year <- iPNW_loss_climate_pet$pet * 12
pet <- ggplot(iPNW_loss_climate_pet, aes(x=pet_year, y=pct_acreage)) +
geom_point()+
geom_smooth() +
xlab("Annual PET (mm)") + ylab("Percentage Drought Wheat Acreage Loss") +
theme(text=element_text(size=16, family="serif"))
#yrangemin <- min(iPNW_loss_climate_pet$lossperacre) - 10
#yrangemax <- max(iPNW_loss_climate_pet$lossperacre) + 10
#y2rangemin <- min(iPNW_loss_climate_pet$pet) - (.05 * min(iPNW_loss_climate_pet$pet))
#y2rangemax <- max(iPNW_loss_climate_pet$pet) + (.05 * max(iPNW_loss_climate_pet$pet))
#twoord.plot(c(1:nrow(iPNW_loss_climate_pet)), iPNW_loss_climate_pet$pct_acreage, c(1:nrow(iPNW_loss_climate_pet)), rylim=c(y2rangemin, y2rangemax), lylim=c(0, 1), iPNW_loss_climate_pet$pet, mar=c(8,4,4,4), xticklab=c(" "), type=c("bar", "b"), lcol = "red", rcol = "blue")
#not needed#text(1:nrow(iPNW_loss_climate_pet), 0 - .05, srt = 90, adj = 1,
#not needed# labels = iPNW_loss_climate_pet$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 4, cex = 1.5)
#mtext(2, text = "% wheat/drought insurance loss acreage", line = 4, cex = 1.5)
#mtext(4, text = "Annual Mean PET (mm)", line = 1, cex = 1.5)
#pr/pet
iPNW_loss_climate_prpet <- iPNW_loss_climate
iPNW_loss_climate_prpet$pet_year <- iPNW_loss_climate_prpet$pet * 12
iPNW_loss_climate_prpet$pr_year <- iPNW_loss_climate_prpet$pr * 12
iPNW_loss_climate_prpet$prpet <- iPNW_loss_climate_prpet$pr_year / iPNW_loss_climate_prpet$pet_year
#iPNW_loss_climate_prpet <- iPNW_loss_climate[order(-iPNW_loss_climate$prpet),]
# ggplot scatterplot
prpet <- ggplot(iPNW_loss_climate_prpet, aes(x=prpet, y=pct_acreage)) +
geom_point()+
geom_smooth()+
xlab("Aridity Index") + ylab("Percentage Drought Wheat Acreage Loss") +
theme(text=element_text(size=16, family="serif"))
grid.arrange(pr, pet, prpet, nrow = 1)
#yrangemin <- min(iPNW_loss_climate_pet$lossperacre) - 10
#yrangemax <- max(iPNW_loss_climate_pet$lossperacre) + 10
#dual axis barplot
#y2rangemin <- min(iPNW_loss_climate_prpet$prpet) - (.05 * min(iPNW_loss_climate_prpet$prpet))
#y2rangemax <- max(iPNW_loss_climate_prpet$prpet) + (.05 * max(iPNW_loss_climate_prpet$prpet))
#twoord.plot(c(1:nrow(iPNW_loss_climate_prpet)), iPNW_loss_climate_prpet$pct_acreage, c(1:nrow(iPNW_loss_climate_prpet)), rylim=c(y2rangemin, y2rangemax), lylim=c(0, 1), iPNW_loss_climate_prpet$prpet, mar=c(8,4,4,4), xticklab=c(" "), type=c("bar", "b"), lcol = "red", rcol = "blue")
#not needed#text(1:nrow(iPNW_loss_climate_prpet), 0 - .05, srt = 90, adj = 1,
#not needed# labels = iPNW_loss_climate_prpet$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 4, cex = 1.5)
#not needed#mtext(2, text = "% wheat/drought insurance loss acreage", line = 4, cex = 1.5)
#mtext(4, text = "Annual Mean Aridity Index (PR/PET (mm))", line = 1, cex = 1.5)
#pdsi - not used
#iPNW_loss_climate_pdsi <- iPNW_loss_climate[order(iPNW_loss_climate$pdsi),]
#not needed#yrangemin <- min(iPNW_loss_climate_pet$lossperacre) - 10
#not needed#yrangemax <- max(iPNW_loss_climate_pet$lossperacre) + 10
#y2rangemin <- min(iPNW_loss_climate_pdsi$pdsi) - (-.25 * min(iPNW_loss_climate_pdsi$pdsi))
#y2rangemax <- max(iPNW_loss_climate_pdsi$pdsi) + (-.5 * max(iPNW_loss_climate_pdsi$pdsi))
#twoord.plot(c(1:nrow(iPNW_loss_climate_pdsi)), iPNW_loss_climate_pdsi$pct_acreage, c(1:nrow(iPNW_loss_climate_pdsi)), rylim=c(y2rangemin, y2rangemax), lylim=c(0,1), iPNW_loss_climate_pdsi$pdsi, mar=c(8,4,4,4), ylab = "% wheat insurance loss acreage due to drought", xticklab=c(" "), rylab = "Mean PDSI", type=c("bar", "b"), lcol = "red", rcol = "blue", main = paste(" 2001 - 2015 iPNW % Wheat/drought insurance loss acreage\n compared to PDSI", sep=""))
#text(1:nrow(iPNW_loss_climate_pdsi), 0 - .05, srt = 90, adj = 1,
# labels = iPNW_loss_climate_pdsi$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 5)
}
ipnw_climate_county_comparison("WHEAT", "Drought")
For the three examined climate variables (potential evapotranspiration, precipitation, maximum temperature), a design matrix was developed for each of the 24 counties that are part of the defined iPNW study area. For each county, an absolute correlation was calculated for each monthly combination for each climate variable to the total wheat insurance loss due to drought. The result is a design matrix map that identifies the monthly combination with the highest correlation For example, for Whitman county, WA, for maximum temperature - the monthly combination with highest correlation between max temperature and wheat insurance loss due to drought was May/June/July (denoted as July 2).
state_var <- "WA"
county_var <- "Whitman"
commodity_var <- "WHEAT"
damage_var <- "Drought"
climate_var <- "tmmx"
predictor_var <- "cube_root_loss"
library(gplots)
library(plyr)
library(dplyr)
#library(plotly)
#packageVersion('plotly')
#response may be: loss_month, loss_per_acres, loss_year, total_acres_harvested, and total_acres_loss
#monthlist is jan-dec, each repeated 12 times
monthlist <- as.data.frame(rep(tolower(month.abb), each = 12))
#numlist is 12 months, repeated 12 times
numlist <- as.data.frame(rep((1:12), times = 12))
#monthnumlist puts month names and 1-12 together, which results in a vector of 144 that is the matrix of 12 by 12
monthnumlist <- as.data.frame(cbind(monthlist, numlist))
#renaming columns
colnames(monthnumlist) <- c("month", "monthcode")
#put the monthlist all together in one vector
monthnumlist$combined <- paste(monthnumlist$month, monthnumlist$monthcode, sep="")
#rename the vector
climmonth <- monthnumlist$combined
designmatrix <- matrix(NA, ncol=12, nrow=12)
#create an empty 144 vector to fill up with correlations between loss and climate
dmvector <- as.data.frame(rep(NA, times=144))
cl = 0
#
# URL <- "https://raw.githubusercontent.com/erichseamon/RFclimatePaper/master/data/climate_matrices/climate_matrices.zip"
# destfile <- "/tmp/climate_matrices.zip"
# download.file(URL, destfile)
# outDir<-"/tmp/climate_matrices"
# unzip(destfile,exdir=outDir)
unzip("data/climate_matrices/climate_matrices.zip", exdir="data/climate_matrices/")
#
# URL <- "https://raw.githubusercontent.com/erichseamon/RFclimatePaper/master/data/climate_correlation_summaries/climate_correlations_summaries.zip"
# destfile <- "/tmp/climate_correlations_summaries.zip"
# download.file(URL, destfile)
# outDir<-"/tmp/climate_correlations_summaries"
# unzip(destfile,exdir=outDir)
unzip("data/climate_correlation_summaries/climate_correlations_summaries.zip", exdir="data/climate_correlation_summaries/")
for (ppp in climmonth) {
cl = cl +1
#setwd("/tmp/climate_matrices")
file1 <- read.csv(paste("data/climate_matrices/", state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", ppp, ".csv", sep=""))
climatevar <- as.data.frame(cbind((file1[climate_var]), file1[2]))
#setwd("/tmp/climate_correlations_summaries")
file2 <- as.data.frame(read.csv(paste("data/climate_correlation_summaries/", state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", predictor_var, ".csv", sep="")))
file2 <- subset(file2, state == state_var)
file2 <- subset(file2, county == county_var)
colnames(file2) <- c("X", "year", "damagecause", "county", "state", "commodity", predictor_var )
climatevar$zscore <- scale(climatevar[1], center = TRUE, scale = TRUE)
colnames(climatevar[3]) <- "zscore"
kor <- join(climatevar, file2, by = "year")
kor2 <- subset(kor, damagecause != "NA")
colnames(kor2)[3] <- "zscore"
# kor2[9] <- as.numeric(kor2$zscore)
kor3 <- cor(kor2[1], kor2[9])
#insert kor3 into designmatrix iteratively
dmvector[cl,] <- kor3
}
dmvector <- as.data.frame(dmvector)
colnames(dmvector) <- "correlations"
dmvector2 <- (matrix(dmvector$correlations, 12, 12, TRUE) )
dmvector2 <- dmvector2[nrow(dmvector2):1, ]
dmvector3 <- dmvector2[4:12,]
dmvector3[9,4:12] <- NA
dmvector3[8,5:12] <- NA
dmvector3[7,6:12] <- NA
dmvector3[6,7:12] <- NA
dmvector3[5,8:12] <- NA
dmvector3[4,9:12] <- NA
dmvector3[3,10:12] <- NA
dmvector3[2,11:12] <- NA
dmvector3[1,12:12] <- NA
dmvector <- c(dmvector3[9,], dmvector3[8,], dmvector3[7,], dmvector3[6,], dmvector3[5,], dmvector3[4,], dmvector3[3,], dmvector3[2,], dmvector3[1,])
dmvector <- data.frame(dmvector)
if(climate_var=='pr'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmin'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmax'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmn'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='pet'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='fm100'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='fm1000'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='pdsi'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmx'){
dmv <- which.max( dmvector[1:108,1])
}
}
}
}
}
}
}
}
}
#dmv <- which.max(abs( dmvector[1:108,1]) )
dmv <- as.data.frame(dmv)
colnames(dmv)[1] <- "row"
#dmvector1a <- max(dmvector$correlations)
#dmvector1b <- data.frame(which=dmvector, correlations=dmvector[dmvector1a, ])
monthnumlist2 <- cbind(as.data.frame(c(1:144)), monthnumlist)
colnames(monthnumlist2)[1] <- "row"
monthnumlist3 <- subset(monthnumlist2, row == dmv$row)
#makeRects <- function(tfMat){
require(utils)
cAbove <- expand.grid(1:12, 1:9)[monthnumlist3$row,]
monthzzz <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep")
my_palette <- colorRampPalette(c("red", "yellow", "green"))(n = 108)
#setwd("data/climatematrix_pngs")
# png(filename=paste(state1, "_", county1, "_", damage1, "_", climate_variable12, "_designmatrix.png", sep=""))
lblcol <- c(9:1)
dmvector3a <- round(dmvector3, digits = 2)
newone <<- as.data.frame(expand.grid(1:12, 1:9)[monthnumlist3$row,])
dmvector3a[9,4:12] <- NA
dmvector3a[8,5:12] <- NA
dmvector3a[7,6:12] <- NA
dmvector3a[6,7:12] <- NA
dmvector3a[5,8:12] <- NA
dmvector3a[4,9:12] <- NA
dmvector3a[3,10:12] <- NA
dmvector3a[2,11:12] <- NA
dmvector3a[1,12:12] <- NA
my_palette <- colorRampPalette(c("lightyellow", "yellow", "orange", "darkorange", "red", "darkred"))(n = 32)
lwid = c(1.5,4)
lhei = c(1.5,4,1)
nba_heatmap_tmmx<- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, scale="none", dendrogram = "none", trace= "none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, notecol = "white", notecex = 2, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""), lmat = rbind(c(0,3),c(2,1),c(0,4)), lwid = lwid, lhei = lhei, cellnote = dmvector3a)
# png(file = "/mnt/ceph/erichs/git/RFclimatePaper/figures/heatmap_tmmx.png", width=8,height=7,units="in",res=1200)
#
# nba_heatmap_tmmx<- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, scale="none", dendrogram = "none", trace= "none", labRow = rev(monthzzz), tracecol = "black", key = FALSE, notecol = "white", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""), lmat = rbind(c(0,3),c(2,1),c(0,4)), lwid = lwid, lhei = lhei, cellnote = dmvector3a)
#
# dev.off
# nba_heatmap_tmmx <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, symbreaks = FALSE, scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""))
# nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, col=topo.colors(16), scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, cellnote = dmvector3a, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""), add.expr=points(newone$Var1,newone$Var2, pch=15, cex = 5.5 , col=rgb(199/255, 0, 0, 0.8)))
state_var <- "WA"
county_var <- "Whitman"
commodity_var <- "WHEAT"
damage_var <- "Drought"
climate_var <- "pet"
predictor_var <- "cube_root_loss"
library(gplots)
library(plyr)
library(dplyr)
#library(plotly)
#packageVersion('plotly')
#response may be: loss_month, loss_per_acres, loss_year, total_acres_harvested, and total_acres_loss
#monthlist is jan-dec, each repeated 12 times
monthlist <- as.data.frame(rep(tolower(month.abb), each = 12))
#numlist is 12 months, repeated 12 times
numlist <- as.data.frame(rep((1:12), times = 12))
#monthnumlist puts month names and 1-12 together, which results in a vector of 144 that is the matrix of 12 by 12
monthnumlist <- as.data.frame(cbind(monthlist, numlist))
#renaming columns
colnames(monthnumlist) <- c("month", "monthcode")
#put the monthlist all together in one vector
monthnumlist$combined <- paste(monthnumlist$month, monthnumlist$monthcode, sep="")
#rename the vector
climmonth <- monthnumlist$combined
designmatrix <- matrix(NA, ncol=12, nrow=12)
#create an empty 144 vector to fill up with correlations between loss and climate
dmvector <- as.data.frame(rep(NA, times=144))
cl = 0
for (ppp in climmonth) {
cl = cl +1
#setwd("data/climate_matrices")
file1 <- read.csv(paste("data/climate_matrices/", state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", ppp, ".csv", sep=""))
climatevar <- as.data.frame(cbind((file1[climate_var]), file1[2]))
#setwd("data/climate_correlations_summaries")
file2 <- as.data.frame(read.csv(paste("data/climate_correlation_summaries/", state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", predictor_var, ".csv", sep="")))
file2 <- subset(file2, state == state_var)
file2 <- subset(file2, county == county_var)
colnames(file2) <- c("X", "year", "damagecause", "county", "state", "commodity", predictor_var )
climatevar$zscore <- scale(climatevar[1], center = TRUE, scale = TRUE)
colnames(climatevar[3]) <- "zscore"
kor <- join(climatevar, file2, by = "year")
kor2 <- subset(kor, damagecause != "NA")
colnames(kor2)[3] <- "zscore"
# kor2[9] <- as.numeric(kor2$zscore)
kor3 <- cor(kor2[1], kor2[9])
#insert kor3 into designmatrix iteratively
dmvector[cl,] <- kor3
}
dmvector <- as.data.frame(dmvector)
colnames(dmvector) <- "correlations"
dmvector2 <- (matrix(dmvector$correlations, 12, 12, TRUE) )
dmvector2 <- dmvector2[nrow(dmvector2):1, ]
dmvector3 <- dmvector2[4:12,]
dmvector3[9,4:12] <- NA
dmvector3[8,5:12] <- NA
dmvector3[7,6:12] <- NA
dmvector3[6,7:12] <- NA
dmvector3[5,8:12] <- NA
dmvector3[4,9:12] <- NA
dmvector3[3,10:12] <- NA
dmvector3[2,11:12] <- NA
dmvector3[1,12:12] <- NA
dmvector <- c(dmvector3[9,], dmvector3[8,], dmvector3[7,], dmvector3[6,], dmvector3[5,], dmvector3[4,], dmvector3[3,], dmvector3[2,], dmvector3[1,])
dmvector <- data.frame(dmvector)
if(climate_var=='pr'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmin'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmax'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmn'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='pet'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='fm100'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='fm1000'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='pdsi'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmx'){
dmv <- which.max( dmvector[1:108,1])
}
}
}
}
}
}
}
}
}
#dmv <- which.max(abs( dmvector[1:108,1]) )
dmv <- as.data.frame(dmv)
colnames(dmv)[1] <- "row"
#dmvector1a <- max(dmvector$correlations)
#dmvector1b <- data.frame(which=dmvector, correlations=dmvector[dmvector1a, ])
monthnumlist2 <- cbind(as.data.frame(c(1:144)), monthnumlist)
colnames(monthnumlist2)[1] <- "row"
monthnumlist3 <- subset(monthnumlist2, row == dmv$row)
#makeRects <- function(tfMat){
require(utils)
cAbove <- expand.grid(1:12, 1:9)[monthnumlist3$row,]
monthzzz <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep")
my_palette <- colorRampPalette(c("red", "yellow", "green"))(n = 108)
#setwd("data/climatematrix_pngs")
# png(filename=paste(state1, "_", county1, "_", damage1, "_", climate_variable12, "_designmatrix.png", sep=""))
lblcol <- c(9:1)
dmvector3a <- round(dmvector3, digits = 2)
newone <<- as.data.frame(expand.grid(1:12, 1:9)[monthnumlist3$row,])
dmvector3a[9,4:12] <- NA
dmvector3a[8,5:12] <- NA
dmvector3a[7,6:12] <- NA
dmvector3a[6,7:12] <- NA
dmvector3a[5,8:12] <- NA
dmvector3a[4,9:12] <- NA
dmvector3a[3,10:12] <- NA
dmvector3a[2,11:12] <- NA
dmvector3a[1,12:12] <- NA
my_palette <- colorRampPalette(c("lightyellow", "yellow", "orange", "darkorange", "red", "darkred"))(n = 32)
lwid = c(1.5,4)
lhei = c(1.5,4,1)
nba_heatmap_pet <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, scale="none", dendrogram = "none", trace= "none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, notecol = "white", notecex = 2, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""), lmat = rbind(c(0,3),c(2,1),c(0,4)), lwid = lwid, lhei = lhei, cellnote = dmvector3a)
#
# png(file = "/mnt/ceph/erichs/git/RFclimatePaper/figures/heatmap_pet.png", width=8,height=7,units="in",res=1200)
#
# nba_heatmap_pet<- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, scale="none", dendrogram = "none", trace= "none", labRow = rev(monthzzz), tracecol = "black", key = FALSE, notecol = "white", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""), lmat = rbind(c(0,3),c(2,1),c(0,4)), lwid = lwid, lhei = lhei, cellnote = dmvector3a)
#
# dev.off
# nba_heatmap_pet <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, symbreaks = FALSE, scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""))
# nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, col=colorRampPalette(c("lightyellow", "yellow", "orange", "darkorange", "red")), scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, cellnote = dmvector3a, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""), add.expr=points(newone$Var1,newone$Var2, pch=15, cex = 5.5 , col=rgb(199/255, 0, 0, 0.8)))
state_var <- "WA"
county_var <- "Whitman"
commodity_var <- "WHEAT"
damage_var <- "Drought"
climate_var <- "pr"
predictor_var <- "cube_root_loss"
library(gplots)
library(plyr)
library(dplyr)
#library(plotly)
#packageVersion('plotly')
#response may be: loss_month, loss_per_acres, loss_year, total_acres_harvested, and total_acres_loss
#monthlist is jan-dec, each repeated 12 times
monthlist <- as.data.frame(rep(tolower(month.abb), each = 12))
#numlist is 12 months, repeated 12 times
numlist <- as.data.frame(rep((1:12), times = 12))
#monthnumlist puts month names and 1-12 together, which results in a vector of 144 that is the matrix of 12 by 12
monthnumlist <- as.data.frame(cbind(monthlist, numlist))
#renaming columns
colnames(monthnumlist) <- c("month", "monthcode")
#put the monthlist all together in one vector
monthnumlist$combined <- paste(monthnumlist$month, monthnumlist$monthcode, sep="")
#rename the vector
climmonth <- monthnumlist$combined
designmatrix <- matrix(NA, ncol=12, nrow=12)
#create an empty 144 vector to fill up with correlations between loss and climate
dmvector <- as.data.frame(rep(NA, times=144))
cl = 0
for (ppp in climmonth) {
cl = cl +1
#setwd("data/climate_matrices")
file1 <- read.csv(paste("data/climate_matrices/", state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", ppp, ".csv", sep=""))
climatevar <- as.data.frame(cbind((file1[climate_var]), file1[2]))
#setwd("data/climate_correlations_summaries")
file2 <- as.data.frame(read.csv(paste("data/climate_correlation_summaries/", state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", predictor_var, ".csv", sep="")))
file2 <- subset(file2, state == state_var)
file2 <- subset(file2, county == county_var)
colnames(file2) <- c("X", "year", "damagecause", "county", "state", "commodity", predictor_var )
climatevar$zscore <- scale(climatevar[1], center = TRUE, scale = TRUE)
colnames(climatevar[3]) <- "zscore"
kor <- join(climatevar, file2, by = "year")
kor2 <- subset(kor, damagecause != "NA")
colnames(kor2)[3] <- "zscore"
# kor2[9] <- as.numeric(kor2$zscore)
kor3 <- cor(kor2[1], kor2[9])
#insert kor3 into designmatrix iteratively
dmvector[cl,] <- kor3
}
dmvector <- as.data.frame(dmvector)
colnames(dmvector) <- "correlations"
dmvector2 <- (matrix(dmvector$correlations, 12, 12, TRUE) )
dmvector2 <- dmvector2[nrow(dmvector2):1, ]
dmvector3 <- dmvector2[4:12,]
dmvector3[9,4:12] <- NA
dmvector3[8,5:12] <- NA
dmvector3[7,6:12] <- NA
dmvector3[6,7:12] <- NA
dmvector3[5,8:12] <- NA
dmvector3[4,9:12] <- NA
dmvector3[3,10:12] <- NA
dmvector3[2,11:12] <- NA
dmvector3[1,12:12] <- NA
dmvector <- c(dmvector3[9,], dmvector3[8,], dmvector3[7,], dmvector3[6,], dmvector3[5,], dmvector3[4,], dmvector3[3,], dmvector3[2,], dmvector3[1,])
dmvector <- data.frame(dmvector)
if(climate_var=='pr'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmin'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmax'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmn'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='pet'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='fm100'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='fm1000'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='pdsi'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmx'){
dmv <- which.max( dmvector[1:108,1])
}
}
}
}
}
}
}
}
}
#dmv <- which.max(abs( dmvector[1:108,1]) )
dmv <- as.data.frame(dmv)
colnames(dmv)[1] <- "row"
#dmvector1a <- max(dmvector$correlations)
#dmvector1b <- data.frame(which=dmvector, correlations=dmvector[dmvector1a, ])
monthnumlist2 <- cbind(as.data.frame(c(1:144)), monthnumlist)
colnames(monthnumlist2)[1] <- "row"
monthnumlist3 <- subset(monthnumlist2, row == dmv$row)
#makeRects <- function(tfMat){
require(utils)
cAbove <- expand.grid(1:12, 1:9)[monthnumlist3$row,]
monthzzz <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep")
my_palette <- colorRampPalette(c("red", "yellow", "green"))(n = 108)
# setwd("/mnt/ceph/erichs/RFclimatePaper/climatematrix_pngs")
# png(filename=paste(state1, "_", county1, "_", damage1, "_", climate_variable12, "_designmatrix.png", sep=""))
lblcol <- c(9:1)
dmvector3a <- round(dmvector3, digits = 2)
newone <<- as.data.frame(expand.grid(1:12, 1:9)[monthnumlist3$row,])
dmvector3a[9,4:12] <- NA
dmvector3a[8,5:12] <- NA
dmvector3a[7,6:12] <- NA
dmvector3a[6,7:12] <- NA
dmvector3a[5,8:12] <- NA
dmvector3a[4,9:12] <- NA
dmvector3a[3,10:12] <- NA
dmvector3a[2,11:12] <- NA
dmvector3a[1,12:12] <- NA
my_palette <- colorRampPalette(c("darkred", "red", "darkorange", "orange", "yellow", "lightyellow"))(n = 16)
lwid = c(1.5,4)
lhei = c(1.5,4,1)
nba_heatmap_pr <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, scale="none", dendrogram = "none", trace= "none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, notecol = "white", notecex = 2, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""), lmat = rbind(c(0,3),c(2,1),c(0,4)), lwid = lwid, lhei = lhei, cellnote = dmvector3a)
#
#
# png(file = "/mnt/ceph/erichs/git/RFclimatePaper/figures/heatmap_pr.png", width=8,height=7,units="in",res=1200)
#
# nba_heatmap_pr<- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, scale="none", dendrogram = "none", trace= "none", labRow = rev(monthzzz), tracecol = "black", key = FALSE, notecol = "white", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""), lmat = rbind(c(0,3),c(2,1),c(0,4)), lwid = lwid, lhei = lhei, cellnote = dmvector3a)
#
# dev.off
# nba_heatmap_pr <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""))
# nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, col=topo.colors(16), scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, cellnote = dmvector3a, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""), add.expr=points(newone$Var1,newone$Var2, pch=15, cex = 5.5 , col=rgb(199/255, 0, 0, 0.8)))
state_var <- "WA"
county_var <- "Whitman"
commodity_var <- "WHEAT"
damage_var <- "Drought"
climate_var <- "pdsi"
predictor_var <- "cube_root_loss"
library(gplots)
library(plyr)
library(dplyr)
#library(plotly)
#packageVersion('plotly')
#response may be: loss_month, loss_per_acres, loss_year, total_acres_harvested, and total_acres_loss
#monthlist is jan-dec, each repeated 12 times
monthlist <- as.data.frame(rep(tolower(month.abb), each = 12))
#numlist is 12 months, repeated 12 times
numlist <- as.data.frame(rep((1:12), times = 12))
#monthnumlist puts month names and 1-12 together, which results in a vector of 144 that is the matrix of 12 by 12
monthnumlist <- as.data.frame(cbind(monthlist, numlist))
#renaming columns
colnames(monthnumlist) <- c("month", "monthcode")
#put the monthlist all together in one vector
monthnumlist$combined <- paste(monthnumlist$month, monthnumlist$monthcode, sep="")
#rename the vector
climmonth <- monthnumlist$combined
designmatrix <- matrix(NA, ncol=12, nrow=12)
#create an empty 144 vector to fill up with correlations between loss and climate
dmvector <- as.data.frame(rep(NA, times=144))
cl = 0
for (ppp in climmonth) {
cl = cl +1
#setwd("data/climate_matrices")
file1 <- read.csv(paste("data/climate_matrices/", state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", ppp, ".csv", sep=""))
climatevar <- as.data.frame(cbind((file1[climate_var]), file1[2]))
#setwd("data/climate_correlations_summaries")
file2 <- as.data.frame(read.csv(paste("data/climate_correlation_summaries/", state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", predictor_var, ".csv", sep="")))
file2 <- subset(file2, state == state_var)
file2 <- subset(file2, county == county_var)
colnames(file2) <- c("X", "year", "damagecause", "county", "state", "commodity", predictor_var )
climatevar$zscore <- scale(climatevar[1], center = TRUE, scale = TRUE)
colnames(climatevar[3]) <- "zscore"
kor <- join(climatevar, file2, by = "year")
kor2 <- subset(kor, damagecause != "NA")
colnames(kor2)[3] <- "zscore"
# kor2[9] <- as.numeric(kor2$zscore)
kor3 <- cor(kor2[1], kor2[9])
#insert kor3 into designmatrix iteratively
dmvector[cl,] <- kor3
}
dmvector <- as.data.frame(dmvector)
colnames(dmvector) <- "correlations"
dmvector2 <- (matrix(dmvector$correlations, 12, 12, TRUE) )
dmvector2 <- dmvector2[nrow(dmvector2):1, ]
dmvector3 <- dmvector2[4:12,]
dmvector3[9,4:12] <- NA
dmvector3[8,5:12] <- NA
dmvector3[7,6:12] <- NA
dmvector3[6,7:12] <- NA
dmvector3[5,8:12] <- NA
dmvector3[4,9:12] <- NA
dmvector3[3,10:12] <- NA
dmvector3[2,11:12] <- NA
dmvector3[1,12:12] <- NA
dmvector <- c(dmvector3[9,], dmvector3[8,], dmvector3[7,], dmvector3[6,], dmvector3[5,], dmvector3[4,], dmvector3[3,], dmvector3[2,], dmvector3[1,])
dmvector <- data.frame(dmvector)
if(climate_var=='pr'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmin'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmax'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmn'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='pet'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='fm100'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='fm1000'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='pdsi'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmx'){
dmv <- which.max( dmvector[1:108,1])
}
}
}
}
}
}
}
}
}
#dmv <- which.max(abs( dmvector[1:108,1]) )
dmv <- as.data.frame(dmv)
colnames(dmv)[1] <- "row"
#dmvector1a <- max(dmvector$correlations)
#dmvector1b <- data.frame(which=dmvector, correlations=dmvector[dmvector1a, ])
monthnumlist2 <- cbind(as.data.frame(c(1:144)), monthnumlist)
colnames(monthnumlist2)[1] <- "row"
monthnumlist3 <- subset(monthnumlist2, row == dmv$row)
#makeRects <- function(tfMat){
require(utils)
cAbove <- expand.grid(1:12, 1:9)[monthnumlist3$row,]
monthzzz <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep")
my_palette <- colorRampPalette(c("red", "yellow", "green"))(n = 108)
# setwd("/mnt/ceph/erichs/RFclimatePaper/climatematrix_pngs")
# png(filename=paste(state1, "_", county1, "_", damage1, "_", climate_variable12, "_designmatrix.png", sep=""))
lblcol <- c(9:1)
dmvector3a <- round(dmvector3, digits = 2)
newone <<- as.data.frame(expand.grid(1:12, 1:9)[monthnumlist3$row,])
dmvector3a[9,4:12] <- NA
dmvector3a[8,5:12] <- NA
dmvector3a[7,6:12] <- NA
dmvector3a[6,7:12] <- NA
dmvector3a[5,8:12] <- NA
dmvector3a[4,9:12] <- NA
dmvector3a[3,10:12] <- NA
dmvector3a[2,11:12] <- NA
dmvector3a[1,12:12] <- NA
my_palette <- colorRampPalette(c("darkred", "red", "darkorange", "orange", "yellow", "lightyellow"))(n = 16)
lwid = c(1.5,4)
lhei = c(1.5,4,1)
nba_heatmap_pdsi <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, scale="none", dendrogram = "none", trace= "none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, notecol = "white", notecex = 2, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""), lmat = rbind(c(0,3),c(2,1),c(0,4)), lwid = lwid, lhei = lhei, cellnote = dmvector3a)
#
# png(file = "/mnt/ceph/erichs/git/RFclimatePaper/figures/heatmap_pdsi.png", width=8,height=7,units="in",res=1200)
#
# nba_heatmap_tmmx<- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, scale="none", dendrogram = "none", trace= "none", labRow = rev(monthzzz), tracecol = "black", key = FALSE, notecol = "white", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""), lmat = rbind(c(0,3),c(2,1),c(0,4)), lwid = lwid, lhei = lhei, cellnote = dmvector3a)
#
# dev.off
# nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, col=topo.colors(16), scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, cellnote = dmvector3a, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""), add.expr=points(newone$Var1,newone$Var2, pch=15, cex = 5.5 , col=rgb(199/255, 0, 0, 0.8)))
In step 3, we generate maps of our climate-lagged correlations, for each climate variable. Each county is labeled with the optimum monthly period that has the highest correlation for that climate variable and wheat/drought insurance loss (July 2 = two months previous to July, or May/June/July), along with the correlation value. Each correlation value is absolute (correlation coefficent - R)
#dyn.load("/opt/modules/climatology/gdal/3.1.4/lib/libgdal.so.27")
library(maptools)
library(RColorBrewer)
library(leaflet)
library(raster)
library(RCurl)
climate_var <- "pr"
predictor_var <- "cube_root_loss"
monthend <- "jul"
monthnumber <- 2
library(RColorBrewer)
library(dplyr)
#
# URL <- "https://raw.githubusercontent.com/erichseamon/RFclimatePaper/master/data/climate_correlations/climate_correlations.zip"
# destfile <- "/tmp/climate_correlations.zip"
# download.file(URL, destfile)
# outDir<-"/tmp/climate_correlations"
# unzip(destfile,exdir=outDir)
unzip("data/climate_correlations/climate_correlations.zip", exdir="data/climate_correlations/")
#setwd("data/climate_correlations/")
if (predictor_var == "loss") {
predictor <- "crop_commodity_loss"
}else {
predictor <- predictor_var
}
files <- list.files(path = "data/climate_correlations/", pattern = predictor)
filey <- do.call(rbind, strsplit(files, '[_]'))
filey <- subset(filey, filey[,5] == climate_var)
colnames(filey) <- c("state", "county", "commodity", "damage", "climate", "crop1", "crop2", "response", "crop3")
filey <- as.data.frame(filey)
data <- with(filey, paste("data/climate_correlations/", state, "_", county, "_", commodity, "_", damage, "_", climate, "_", crop1, "_", crop2, "_", response, "_", crop3, sep=""))
tables <- lapply(data, read.csv, header = TRUE)
tables <- lapply(tables, function(x) { x["X"] <- NULL; x }) #--remove first index row from each list
tables <- lapply(tables, function(x) dplyr::arrange(x, -row_number())) #--(flips matrix - puts jan as 1st row and sept as 9th row)
for (i in 1:26) {
tables[[i]][1,4:12] <- NA
tables[[i]][2,5:12] <- NA
tables[[i]][3,6:12] <- NA
tables[[i]][4,7:12] <- NA
tables[[i]][5,8:12] <- NA
tables[[i]][6,9:12] <- NA
tables[[i]][7,10:12] <- NA
tables[[i]][8,11:12] <- NA
tables[[i]][9,12:12] <- NA
}
monthly <- match(monthend, tolower(month.abb))
if(climate_var=='pr'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmax'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmmx'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm100'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm1000'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pet'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pdsi'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
}
}
}
}
}
}
}
}
}
bestcounty[,1] <- tolower(month.abb[bestcounty[,1]])
bestcounty2 <- cbind(data.frame(filey$county), bestcounty)
colnames(bestcounty2) <- c("NAME", "MONTH", "ENDMONTH", "CORRELATION")
#new
#!!!!!!fix-row by column, or number of months by ending month
table2 <- lapply(tables, function(x) x[monthly, as.numeric(monthnumber)])
table3 <- data.frame(matrix(unlist(table2), nrow=length(table2), byrow=T))
colnames(table3) <- "correlations"
#combined <- do.call(rbind , tables)
table4 <- cbind(filey, table3)
#if (predictor_var == "loss") {
#predictor_var <- "crop_commodity_loss"
#}
table5 <- table4[c(2:5,10)]
colnames(table5) <- c("NAME", "COMMODITY", "DAMAGE", "climate", "correlations")
#table5$STATE_NAME <- state.name[match(table5[,1],state.abb)]
#
# URL <- "https://raw.githubusercontent.com/erichseamon/RFclimatePaper/master/data/counties/threestate_palouse.zip"
# destfile <- "/tmp/seamon/threestate_palouse.zip"
# download.file(URL, destfile)
# outDir<-"/tmp/seamon"
# unzip(destfile,exdir=outDir)
unzip("data/counties/threestate_palouse.zip", exdir="data/counties/")
counties <- readShapePoly('data/counties/threestate_palouse.shp',
proj4string=CRS
("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
counties <- counties[counties$NAME != "Kootenai",]
counties <- counties[counties$NAME != "Clearwater",]
counties2 <- merge(counties, table5, by = "NAME" )
#--new
counties3 <- merge(counties2, bestcounty2, by = "NAME")
counties3$MONTHCOMBO <- paste(counties3$MONTH, counties3$ENDMONTH, sep="")
#--new
#colorbrew <- list(color = brewer.pal(26, c("green", "blue", "yellow")))
#my_palette <- colorRampPalette(c("darkred", "lightyellow"))(n = 26)
#commented out spring 2022
# counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
counties3$CORRELATION <- as.numeric(counties3$CORRELATION)
#pal <- colorNumeric(rev(my_palette), na.color = "#ffffff", domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
pal <- colorNumeric(colorRampPalette(c("red", "orange", "yellow", "lightyellow"))(11), na.color = "#ffffff",
domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
pal2 <- colorNumeric(colorRampPalette(c("lightyellow", "yellow", "orange", "red"))(11), na.color = "#ffffff",
domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
#--
myLabelFormat = function(..., reverse_order = FALSE){
if(reverse_order){
function(type = "numeric", cuts){
cuts <- sort(cuts, decreasing = T)
}
}else{
labelFormat(...)
}
}
#colorss = colorRampPalette(brewer.pal(11,"Spectral"))
#finalcol <- colorss(len <- length(counties3$CORRELATION))
#finalcol2 <- topo.colors(length(counties3$CORRELATION))[order(order(counties3$CORRELATION))]
#cellselect <- paste(monthend, monthnumber, sep="")
#par(mfrow=c(1,4))
#layout(matrix(c(1,1,1,2), 1, 4, byrow = TRUE))
#par(mar = c(1,1,1,1) + 0.1)
#plot(counties3, col = finalcol2, xaxs="i", yaxs="i")
##text(coordinates(counties2), labels=round(counties2$correlations, 2), cex=1.5, col = "black")
#added from ID
#corre <- round(as.numeric(as.character(counties3$CORRELATION)), 2)
#text(coordinates(counties2), labels=paste(counties3$MONTHCOMBO, "\n", corre, sep=""), cex=1.5, col = "white", font = 2)
#--
exte <- extent(counties3)
library(htmltools)
tag.map.title <- tags$style(HTML("
.leaflet-control.map-title {
transform: translate(-50%,20%);
position: fixed !important;
left: 50%;
text-align: center;
padding-left: 10px;
padding-right: 10px;
background: rgba(255,255,255,0.75);
font-weight: bold;
font-size: 24px;
}
"))
title <- tags$div(
tag.map.title, HTML(paste("IPNW Correlation, Climate vs. ", predictor_var, " by County for ", climate_var, sep=""))
)
lat_long <- coordinates(counties3)
#labels <- paste(counties3$MONTHCOMBO, as.character(round(counties3$CORRELATION, 2)), sep = "<br/>")
#counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
counties3a <- data.frame(counties3)
labs <- lapply(seq(nrow(counties3a)), function(i) {
paste0(as.character(round(counties3a[i, "CORRELATION"],2)), '<br/>',
counties3a[i, "MONTHCOMBO"])
})
map <- leaflet(data = counties3) %>%
addProviderTiles(providers$Hydda.Base) %>%
addProviderTiles(providers$Stamen.TonerLines) %>%
#addControl(title, position = "topleft", className="map-title") %>%
fitBounds(exte[1], exte[3], exte[2], exte[4]) %>% addPolygons(fillColor = ~pal(counties3$CORRELATION), fillOpacity = .8, weight = 2, stroke = TRUE, smoothFactor = 0.5, color = "black") %>%
addLabelOnlyMarkers(data = counties3, lng = lat_long[,1], lat = lat_long[,2], label = lapply(labs, HTML), labelOptions = labelOptions(noHide = TRUE, direction = 'middle', textOnly = TRUE, textsize = "14px", col = "white", style=list(
'font-family'= 'serif',
'font-style'= 'bold',
'font-size' = '14px'
))) %>%
addLegend(pal = pal2, values = counties3$CORRELATION, labels = c("1", "2"), opacity = .5, title = paste("Correlation", " Matrix", sep="<br>"),
position = "bottomright", labFormat = labelFormat(transform = function(x) sort(x, decreasing = TRUE)))
map
library(maptools)
library(RColorBrewer)
library(leaflet)
library(raster)
library(RCurl)
climate_var <- "pet"
predictor_var <- "cube_root_loss"
monthend <- "jul"
monthnumber <- 2
library(RColorBrewer)
library(dplyr)
#setwd("data/climate_correlations/")
if (predictor_var == "loss") {
predictor <- "crop_commodity_loss"
}else {
predictor <- predictor_var
}
files <- list.files(path = "data/climate_correlations/", pattern = predictor)
filey <- do.call(rbind, strsplit(files, '[_]'))
filey <- subset(filey, filey[,5] == climate_var)
colnames(filey) <- c("state", "county", "commodity", "damage", "climate", "crop1", "crop2", "response", "crop3")
filey <- as.data.frame(filey)
data <- with(filey, paste("data/climate_correlations/", state, "_", county, "_", commodity, "_", damage, "_", climate, "_", crop1, "_", crop2, "_", response, "_", crop3, sep=""))
tables <- lapply(data, read.csv, header = TRUE)
tables <- lapply(tables, function(x) { x["X"] <- NULL; x }) #--remove first index row from each list
tables <- lapply(tables, function(x) dplyr::arrange(x, -row_number())) #--(flips matrix - puts jan as 1st row and sept as 9th row)
for (i in 1:26) {
tables[[i]][1,4:12] <- NA
tables[[i]][2,5:12] <- NA
tables[[i]][3,6:12] <- NA
tables[[i]][4,7:12] <- NA
tables[[i]][5,8:12] <- NA
tables[[i]][6,9:12] <- NA
tables[[i]][7,10:12] <- NA
tables[[i]][8,11:12] <- NA
tables[[i]][9,12:12] <- NA
}
monthly <- match(monthend, tolower(month.abb))
if(climate_var=='pr'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmax'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmmx'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm100'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm1000'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pet'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pdsi'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
}
}
}
}
}
}
}
}
}
bestcounty[,1] <- tolower(month.abb[bestcounty[,1]])
bestcounty2 <- cbind(data.frame(filey$county), bestcounty)
colnames(bestcounty2) <- c("NAME", "MONTH", "ENDMONTH", "CORRELATION")
#new
#!!!!!!fix-row by column, or number of months by ending month
table2 <- lapply(tables, function(x) x[monthly, as.numeric(monthnumber)])
table3 <- data.frame(matrix(unlist(table2), nrow=length(table2), byrow=T))
colnames(table3) <- "correlations"
#combined <- do.call(rbind , tables)
table4 <- cbind(filey, table3)
#if (predictor_var == "loss") {
#predictor_var <- "crop_commodity_loss"
#}
table5 <- table4[c(2:5,10)]
colnames(table5) <- c("NAME", "COMMODITY", "DAMAGE", "climate", "correlations")
#table5$STATE_NAME <- state.name[match(table5[,1],state.abb)]
#
# URL <- "https://raw.githubusercontent.com/erichseamon/RFclimatePaper/master/data/counties/threestate_palouse.zip"
# destfile <- "/tmp/seamon/threestate_palouse.zip"
# download.file(URL, destfile)
# outDir<-"/tmp/seamon"
# unzip(destfile,exdir=outDir)
#
unzip("data/counties/threestate_palouse.zip", exdir="data/counties/")
counties <- readShapePoly('data/counties/threestate_palouse.shp',
proj4string=CRS
("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
counties <- counties[counties$NAME != "Kootenai",]
counties <- counties[counties$NAME != "Clearwater",]
counties2 <- merge(counties, table5, by = "NAME" )
#--new
counties3 <- merge(counties2, bestcounty2, by = "NAME")
counties3$MONTHCOMBO <- paste(counties3$MONTH, counties3$ENDMONTH, sep="")
#--new
#colorbrew <- list(color = brewer.pal(26, c("green", "blue", "yellow")))
#my_palette <- colorRampPalette(c("darkred", "lightyellow"))(n = 26)
# counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
counties3$CORRELATION <- as.numeric(counties3$CORRELATION)
#pal <- colorNumeric(rev(my_palette), na.color = "#ffffff", domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
pal <- colorNumeric(colorRampPalette(c("lightyellow", "yellow", "orange", "red"))(11), na.color = "#ffffff",
domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
#--
#colorss = colorRampPalette(brewer.pal(11,"Spectral"))
#finalcol <- colorss(len <- length(counties3$CORRELATION))
#finalcol2 <- topo.colors(length(counties3$CORRELATION))[order(order(counties3$CORRELATION))]
#cellselect <- paste(monthend, monthnumber, sep="")
#par(mfrow=c(1,4))
#layout(matrix(c(1,1,1,2), 1, 4, byrow = TRUE))
#par(mar = c(1,1,1,1) + 0.1)
#plot(counties3, col = finalcol2, xaxs="i", yaxs="i")
##text(coordinates(counties2), labels=round(counties2$correlations, 2), cex=1.5, col = "black")
#added from ID
#corre <- round(as.numeric(as.character(counties3$CORRELATION)), 2)
#text(coordinates(counties2), labels=paste(counties3$MONTHCOMBO, "\n", corre, sep=""), cex=1.5, col = "white", font = 2)
#--
exte <- extent(counties3)
library(htmltools)
tag.map.title <- tags$style(HTML("
.leaflet-control.map-title {
transform: translate(-50%,20%);
position: fixed !important;
left: 50%;
text-align: center;
padding-left: 10px;
padding-right: 10px;
background: rgba(255,255,255,0.75);
font-weight: bold;
font-size: 24px;
}
"))
title <- tags$div(
tag.map.title, HTML(paste("IPNW Correlation, Climate vs. ", predictor_var, " by County for ", climate_var, sep=""))
)
lat_long <- coordinates(counties3)
#labels <- paste(counties3$MONTHCOMBO, as.character(round(counties3$CORRELATION, 2)), sep = "<br/>")
#counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
counties3a <- data.frame(counties3)
labs <- lapply(seq(nrow(counties3a)), function(i) {
paste0(as.character(round(counties3a[i, "CORRELATION"],2)), '<br/>',
counties3a[i, "MONTHCOMBO"])
})
map <- leaflet(data = counties3) %>%
addProviderTiles(providers$Hydda.Base) %>%
addProviderTiles(providers$Stamen.TonerLines) %>%
#addControl(title, position = "topleft", className="map-title") %>%
fitBounds(exte[1], exte[3], exte[2], exte[4]) %>% addPolygons(fillColor = ~pal(counties3$CORRELATION), fillOpacity = .8, weight = 2, stroke = TRUE, smoothFactor = 0.5, color = "black") %>%
addLabelOnlyMarkers(data = counties3, lng = lat_long[,1], lat = lat_long[,2], label = lapply(labs, HTML), labelOptions = labelOptions(noHide = TRUE, direction = 'middle', textOnly = TRUE, textsize = "14px", col = "white", style=list(
'font-family'= 'serif',
'font-style'= 'bold',
'font-size' = '14px'
))) %>%
addLegend(pal = pal, values = counties3$CORRELATION, labels = c("1", "2"), opacity = .5, title = paste("Correlation", " Matrix", sep="<br>"),
position = "bottomright")
map
library(maptools)
library(RColorBrewer)
library(leaflet)
library(raster)
library(RCurl)
climate_var <- "tmmx"
predictor_var <- "cube_root_loss"
monthend <- "jul"
monthnumber <- 2
library(RColorBrewer)
library(dplyr)
#setwd("/tmp/climate_correlations/")
if (predictor_var == "loss") {
predictor <- "crop_commodity_loss"
}else {
predictor <- predictor_var
}
files <- list.files(path = "data/climate_correlations/", pattern = predictor)
filey <- do.call(rbind, strsplit(files, '[_]'))
filey <- subset(filey, filey[,5] == climate_var)
colnames(filey) <- c("state", "county", "commodity", "damage", "climate", "crop1", "crop2", "response", "crop3")
filey <- as.data.frame(filey)
data <- with(filey, paste("data/climate_correlations/", state, "_", county, "_", commodity, "_", damage, "_", climate, "_", crop1, "_", crop2, "_", response, "_", crop3, sep=""))
tables <- lapply(data, read.csv, header = TRUE)
tables <- lapply(tables, function(x) { x["X"] <- NULL; x }) #--remove first index row from each list
tables <- lapply(tables, function(x) dplyr::arrange(x, -row_number())) #--(flips matrix - puts jan as 1st row and sept as 9th row)
for (i in 1:26) {
tables[[i]][1,4:12] <- NA
tables[[i]][2,5:12] <- NA
tables[[i]][3,6:12] <- NA
tables[[i]][4,7:12] <- NA
tables[[i]][5,8:12] <- NA
tables[[i]][6,9:12] <- NA
tables[[i]][7,10:12] <- NA
tables[[i]][8,11:12] <- NA
tables[[i]][9,12:12] <- NA
}
monthly <- match(monthend, tolower(month.abb))
if(climate_var=='pr'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmax'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmmx'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm100'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm1000'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pet'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pdsi'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
}
}
}
}
}
}
}
}
}
bestcounty[,1] <- tolower(month.abb[bestcounty[,1]])
bestcounty2 <- cbind(data.frame(filey$county), bestcounty)
colnames(bestcounty2) <- c("NAME", "MONTH", "ENDMONTH", "CORRELATION")
#new
#!!!!!!fix-row by column, or number of months by ending month
table2 <- lapply(tables, function(x) x[monthly, as.numeric(monthnumber)])
table3 <- data.frame(matrix(unlist(table2), nrow=length(table2), byrow=T))
colnames(table3) <- "correlations"
#combined <- do.call(rbind , tables)
table4 <- cbind(filey, table3)
#if (predictor_var == "loss") {
#predictor_var <- "crop_commodity_loss"
#}
table5 <- table4[c(2:5,10)]
colnames(table5) <- c("NAME", "COMMODITY", "DAMAGE", "climate", "correlations")
#table5$STATE_NAME <- state.name[match(table5[,1],state.abb)]
#
# URL <- "https://raw.githubusercontent.com/erichseamon/RFclimatePaper/master/data/counties/threestate_palouse.zip"
# destfile <- "/tmp/seamon/threestate_palouse.zip"
# download.file(URL, destfile)
# outDir<-"/tmp/seamon"
# unzip(destfile,exdir=outDir)
unzip("data/counties/threestate_palouse.zip", exdir="data/counties/")
counties <- readShapePoly('data/counties/threestate_palouse.shp',
proj4string=CRS
("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
counties <- counties[counties$NAME != "Kootenai",]
counties <- counties[counties$NAME != "Clearwater",]
counties2 <- merge(counties, table5, by = "NAME" )
#--new
counties3 <- merge(counties2, bestcounty2, by = "NAME")
counties3$MONTHCOMBO <- paste(counties3$MONTH, counties3$ENDMONTH, sep="")
#--new
#colorbrew <- list(color = brewer.pal(26, c("green", "blue", "yellow")))
#my_palette <- colorRampPalette(c("darkred", "lightyellow"))(n = 26)
# counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
counties3$CORRELATION <- as.numeric(counties3$CORRELATION)
#pal <- colorNumeric(rev(my_palette), na.color = "#ffffff", domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
pal <- colorNumeric(colorRampPalette(c("lightyellow", "yellow", "orange", "red"))(11), na.color = "#ffffff",
domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
#--
#colorss = colorRampPalette(brewer.pal(11,"Spectral"))
#finalcol <- colorss(len <- length(counties3$CORRELATION))
#finalcol2 <- topo.colors(length(counties3$CORRELATION))[order(order(counties3$CORRELATION))]
#cellselect <- paste(monthend, monthnumber, sep="")
#par(mfrow=c(1,4))
#layout(matrix(c(1,1,1,2), 1, 4, byrow = TRUE))
#par(mar = c(1,1,1,1) + 0.1)
#plot(counties3, col = finalcol2, xaxs="i", yaxs="i")
##text(coordinates(counties2), labels=round(counties2$correlations, 2), cex=1.5, col = "black")
#added from ID
#corre <- round(as.numeric(as.character(counties3$CORRELATION)), 2)
#text(coordinates(counties2), labels=paste(counties3$MONTHCOMBO, "\n", corre, sep=""), cex=1.5, col = "white", font = 2)
#--
exte <- extent(counties3)
library(htmltools)
tag.map.title <- tags$style(HTML("
.leaflet-control.map-title {
transform: translate(-50%,20%);
position: fixed !important;
left: 50%;
text-align: center;
padding-left: 10px;
padding-right: 10px;
background: rgba(255,255,255,0.75);
font-weight: bold;
font-size: 24px;
}
"))
title <- tags$div(
tag.map.title, HTML(paste("IPNW Correlation, Climate vs. ", predictor_var, " by County for ", climate_var, sep=""))
)
lat_long <- coordinates(counties3)
#labels <- paste(counties3$MONTHCOMBO, as.character(round(counties3$CORRELATION, 2)), sep = "<br/>")
#counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
counties3a <- data.frame(counties3)
labs <- lapply(seq(nrow(counties3a)), function(i) {
paste0(as.character(round(counties3a[i, "CORRELATION"],2)), '<br/>',
counties3a[i, "MONTHCOMBO"])
})
map <- leaflet(data = counties3) %>%
addProviderTiles(providers$Hydda.Base) %>%
addProviderTiles(providers$Stamen.TonerLines) %>%
#addControl(title, position = "topleft", className="map-title") %>%
fitBounds(exte[1], exte[3], exte[2], exte[4]) %>% addPolygons(fillColor = ~pal(counties3$CORRELATION), fillOpacity = .8, weight = 2, stroke = TRUE, smoothFactor = 0.5, color = "black") %>%
addLabelOnlyMarkers(data = counties3, lng = lat_long[,1], lat = lat_long[,2], label = lapply(labs, HTML), labelOptions = labelOptions(noHide = TRUE, direction = 'middle', textOnly = TRUE, textsize = "14px", col = "white", style=list(
'font-family'= 'serif',
'font-style'= 'bold',
'font-size' = '14px'
))) %>%
addLegend(pal = pal, values = counties3$CORRELATION, labels = c("1", "2"), opacity = .5, title = paste("Correlation", " Matrix", sep="<br>"),
position = "bottomright")
map
library(maptools)
library(RColorBrewer)
library(leaflet)
library(raster)
library(RCurl)
climate_var <- "pdsi"
predictor_var <- "cube_root_loss"
monthend <- "jul"
monthnumber <- 2
library(RColorBrewer)
library(dplyr)
#setwd("data/climate_correlations/")
if (predictor_var == "loss") {
predictor <- "crop_commodity_loss"
}else {
predictor <- predictor_var
}
files <- list.files(path = "data/climate_correlations/", pattern = predictor)
filey <- do.call(rbind, strsplit(files, '[_]'))
filey <- subset(filey, filey[,5] == climate_var)
colnames(filey) <- c("state", "county", "commodity", "damage", "climate", "crop1", "crop2", "response", "crop3")
filey <- as.data.frame(filey)
data <- with(filey, paste("data/climate_correlations/", state, "_", county, "_", commodity, "_", damage, "_", climate, "_", crop1, "_", crop2, "_", response, "_", crop3, sep=""))
tables <- lapply(data, read.csv, header = TRUE)
tables <- lapply(tables, function(x) { x["X"] <- NULL; x }) #--remove first index row from each list
tables <- lapply(tables, function(x) dplyr::arrange(x, -row_number())) #--(flips matrix - puts jan as 1st row and sept as 9th row)
for (i in 1:26) {
tables[[i]][1,4:12] <- NA
tables[[i]][2,5:12] <- NA
tables[[i]][3,6:12] <- NA
tables[[i]][4,7:12] <- NA
tables[[i]][5,8:12] <- NA
tables[[i]][6,9:12] <- NA
tables[[i]][7,10:12] <- NA
tables[[i]][8,11:12] <- NA
tables[[i]][9,12:12] <- NA
}
monthly <- match(monthend, tolower(month.abb))
if(climate_var=='pr'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmax'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmmx'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm100'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm1000'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pet'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pdsi'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
}
}
}
}
}
}
}
}
}
bestcounty[,1] <- tolower(month.abb[bestcounty[,1]])
bestcounty2 <- cbind(data.frame(filey$county), bestcounty)
colnames(bestcounty2) <- c("NAME", "MONTH", "ENDMONTH", "CORRELATION")
#new
#!!!!!!fix-row by column, or number of months by ending month
table2 <- lapply(tables, function(x) x[monthly, as.numeric(monthnumber)])
table3 <- data.frame(matrix(unlist(table2), nrow=length(table2), byrow=T))
colnames(table3) <- "correlations"
#combined <- do.call(rbind , tables)
table4 <- cbind(filey, table3)
#if (predictor_var == "loss") {
#predictor_var <- "crop_commodity_loss"
#}
table5 <- table4[c(2:5,10)]
colnames(table5) <- c("NAME", "COMMODITY", "DAMAGE", "climate", "correlations")
#table5$STATE_NAME <- state.name[match(table5[,1],state.abb)]
#
# URL <- "https://raw.githubusercontent.com/erichseamon/RFclimatePaper/master/data/counties/threestate_palouse.zip"
# destfile <- "/tmp/seamon/threestate_palouse.zip"
# download.file(URL, destfile)
# outDir<-"/tmp/seamon"
# unzip(destfile,exdir=outDir)
unzip("data/counties/threestate_palouse.zip", exdir="data/counties/")
counties <- readShapePoly('data/counties/threestate_palouse.shp',
proj4string=CRS
("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
counties <- counties[counties$NAME != "Kootenai",]
counties <- counties[counties$NAME != "Clearwater",]
counties2 <- merge(counties, table5, by = "NAME" )
#--new
counties3 <- merge(counties2, bestcounty2, by = "NAME")
counties3$MONTHCOMBO <- paste(counties3$MONTH, counties3$ENDMONTH, sep="")
#--new
#colorbrew <- list(color = brewer.pal(26, c("green", "blue", "yellow")))
#my_palette <- colorRampPalette(c("darkred", "lightyellow"))(n = 26)
# counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
counties3$CORRELATION <- as.numeric(counties3$CORRELATION)
#pal <- colorNumeric(rev(my_palette), na.color = "#ffffff", domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
pal <- colorNumeric(colorRampPalette(c("red", "orange", "yellow", "lightyellow"))(11), na.color = "#ffffff",
domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
pal2 <- colorNumeric(colorRampPalette(c("lightyellow", "yellow", "orange", "red"))(11), na.color = "#ffffff",
domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
myLabelFormat = function(..., reverse_order = FALSE){
if(reverse_order){
function(type = "numeric", cuts){
cuts <- sort(cuts, decreasing = T)
}
}else{
labelFormat(...)
}
}
#--
#colorss = colorRampPalette(brewer.pal(11,"Spectral"))
#finalcol <- colorss(len <- length(counties3$CORRELATION))
#finalcol2 <- topo.colors(length(counties3$CORRELATION))[order(order(counties3$CORRELATION))]
#cellselect <- paste(monthend, monthnumber, sep="")
#par(mfrow=c(1,4))
#layout(matrix(c(1,1,1,2), 1, 4, byrow = TRUE))
#par(mar = c(1,1,1,1) + 0.1)
#plot(counties3, col = finalcol2, xaxs="i", yaxs="i")
##text(coordinates(counties2), labels=round(counties2$correlations, 2), cex=1.5, col = "black")
#added from ID
#corre <- round(as.numeric(as.character(counties3$CORRELATION)), 2)
#text(coordinates(counties2), labels=paste(counties3$MONTHCOMBO, "\n", corre, sep=""), cex=1.5, col = "white", font = 2)
#--
exte <- extent(counties3)
library(htmltools)
tag.map.title <- tags$style(HTML("
.leaflet-control.map-title {
transform: translate(-50%,20%);
position: fixed !important;
left: 50%;
text-align: center;
padding-left: 10px;
padding-right: 10px;
background: rgba(255,255,255,0.75);
font-weight: bold;
font-size: 24px;
}
"))
title <- tags$div(
tag.map.title, HTML(paste("IPNW Correlation, Climate vs. ", predictor_var, " by County for ", climate_var, sep=""))
)
lat_long <- coordinates(counties3)
#labels <- paste(counties3$MONTHCOMBO, as.character(round(counties3$CORRELATION, 2)), sep = "<br/>")
#counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
counties3a <- data.frame(counties3)
labs <- lapply(seq(nrow(counties3a)), function(i) {
paste0(as.character(round(counties3a[i, "CORRELATION"],2)), '<br/>',
counties3a[i, "MONTHCOMBO"])
})
map <- leaflet(data = counties3) %>%
addProviderTiles(providers$Hydda.Base) %>%
addProviderTiles(providers$Stamen.TonerLines) %>%
#addControl(title, position = "topleft", className="map-title") %>%
fitBounds(exte[1], exte[3], exte[2], exte[4]) %>% addPolygons(fillColor = ~pal(counties3$CORRELATION), fillOpacity = .8, weight = 2, stroke = TRUE, smoothFactor = 0.5, color = "black") %>%
addLabelOnlyMarkers(data = counties3, lng = lat_long[,1], lat = lat_long[,2], label = lapply(labs, HTML), labelOptions = labelOptions(noHide = TRUE, direction = 'middle', textOnly = TRUE, textsize = "14px", col = "white", style=list(
'font-family'= 'serif',
'font-style'= 'bold',
'font-size' = '14px'
))) %>%
addLegend(pal = pal2, values = counties3$CORRELATION, labels = c("1", "2"), opacity = .5, title = paste("Correlation", " Matrix", sep="<br>"),
position = "bottomright", labFormat = labelFormat(transform = function(x) sort(x, decreasing = TRUE)))
map
library(maptools)
library(RColorBrewer)
library(leaflet)
library(raster)
library(RCurl)
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y))
txt <- format(c(r, 0.123456789), digits = digits)[1]
txt <- paste0(prefix, txt)
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex.cor * r)
}
var_commodity <- "WHEAT"
var_damage <- "Drought"
#load wheat pricing
#
# URL <- "https://raw.githubusercontent.com/erichseamon/RFclimatePaper/master/data/wheat_prices/wheat_prices_1989_2015.csv"
# destfile <- "/tmp/wheat_prices_1989_2015.csv"
# download.file(URL, destfile)
# outDir<-"/tmp/"
# #unzip(destfile,exdir=outDir)
unzip("data/wheat_prices/wheat_prices_1989_2015.csv", exdir="data/wheat_prices/")
wheatprice <- read.csv("data/wheat_prices/wheat_prices_1989_2015.csv", header = TRUE, strip.white = TRUE, sep = ",")
wheatprice <- wheatprice[-1]
colnames(wheatprice) <- c("year", "price")
#--accessing output of design matrix/time lag data based on monthly selection from dashboard runs
#
# URL <- "https://raw.githubusercontent.com/erichseamon/RFclimatePaper/master/data/climate_outputs/climate_outputs.zip"
# destfile <- "/tmp/climate_outputs.zip"
# download.file(URL, destfile)
# outDir<-"/tmp/climate_outputs"
# unzip(destfile,exdir=outDir)
unzip("data/climate_outputs/climate_outputs.zip", exdir="data/climate_outputs/")
#setwd("data/climate_outputs")
var1 <- read.csv(paste("data/climate_outputs/", "pr_jun2_cube_root_loss_climatecorrelation.csv", sep=""))
colnames(var1)[9] <- paste(colnames(var1)[2], "_zscore", sep="")
var2 <- read.csv(paste("data/climate_outputs/", "pet_jul3_cube_root_loss_climatecorrelation.csv", sep=""))
colnames(var2)[9] <- paste(colnames(var2)[2], "_zscore", sep="")
var3 <- read.csv(paste("data/climate_outputs/", "tmmx_jul2_cube_root_loss_climatecorrelation.csv", sep=""))
colnames(var3)[9] <- paste(colnames(var3)[2], "_zscore", sep="")
var4 <- read.csv(paste("data/climate_outputs/", "pr_jun2_cube_root_acres_climatecorrelation.csv", sep=""))
colnames(var4)[9] <- paste(colnames(var4)[2], "_zscore", sep="")
var5 <- read.csv(paste("data/climate_outputs/", "pet_jun2_loss_per_acre_climatecorrelation.csv", sep=""))
colnames(var5)[9] <- paste(colnames(var5)[2], "_zscore", sep="")
var6 <- read.csv(paste("data/climate_outputs/", "tmmx_jun1_cube_root_acres_climatecorrelation.csv", sep=""))
colnames(var6)[9] <- paste(colnames(var6)[2], "_zscore", sep="")
var7 <- read.csv(paste("data/climate_outputs/", "pr_sep5_loss_climatedata.csv", sep=""))
colnames(var7)[9] <- paste(colnames(var7)[2], "_zscore", sep="")
var8 <- read.csv(paste("data/climate_outputs/", "pet_sep5_loss_climatedata.csv", sep=""))
colnames(var8)[9] <- paste(colnames(var8)[2], "_zscore", sep="")
var9 <- read.csv(paste("data/climate_outputs/", "tmmx_jul2_loss_climatedata.csv", sep=""))
colnames(var9)[9] <- paste(colnames(var9)[2], "_zscore", sep="")
var9x <- read.csv(paste("data/climate_outputs/", "fm100_jul3_cube_root_loss_climatedata.csv", sep=""))
colnames(var9x)[9] <- paste(colnames(var9x)[2], "_zscore", sep="")
var10x <- read.csv(paste("data/climate_outputs/", "fm1000_aug2_cube_root_loss_climatedata.csv", sep=""))
colnames(var10x)[9] <- paste(colnames(var10x)[2], "_zscore", sep="")
var11x <- read.csv(paste("data/climate_outputs/", "pdsi_sep4_cube_root_loss_climatedata.csv", sep=""))
colnames(var11x)[9] <- paste(colnames(var11x)[2], "_zscore", sep="")
var12x <- read.csv(paste("data/climate_outputs/", "pdsi_sep4_cube_root_loss_climatecorrelation.csv", sep=""))
colnames(var12x)[9] <- paste(colnames(var12x)[2], "_zscore", sep="")
var7a <- read.csv(paste("data/climate_outputs/", "pr_sep5_loss_climatecorrelation.csv", sep=""))
colnames(var7a)[9] <- paste(colnames(var7a)[2], "_zscore", sep="")
var8a <- read.csv(paste("data/climate_outputs/", "pet_sep5_loss_climatecorrelation.csv", sep=""))
colnames(var8a)[9] <- paste(colnames(var8a)[2], "_zscore", sep="")
var9a <- read.csv(paste("data/climate_outputs/", "tmmx_jul2_loss_climatecorrelation.csv", sep=""))
colnames(var9a)[9] <- paste(colnames(var9a)[2], "_zscore", sep="")
data1 <- cbind(var1, var2[9], var3[9])
data2 <- cbind(var1[1:6], var2[2], var3[2])
data3 <- cbind(var4[1:6], var5[2], var6[2])
data3 <- plyr::join(data3, wheatprice, by = "year")
data4 <- cbind(var7[1:6], var8[2], var9[2], var9x[2], var10x[2], var11x[2], var12x[3] )
#data4$prpet <- data4$pr / data4$pet
data4a <- dplyr::left_join(data4, var7a, by = c("year" = "year", "county" = "county"))
data4a <- dplyr::left_join(data4a, wheatprice, by = c("year"))
data4aa <- na.omit(data4a)
colnames(data4aa) <- c("X", "pr", "year", "pr_zscore", "damagecause", "county", "pet", "tmmx", "fm100", "fm1000", "pdsi", "cube_loss", "X.y", "pr2", "loss", "state", "commodity", "matrixnumber", "clim_zscore", "loss_zscore", "price")
data4aa$prpet <- data4aa$pr / data4aa$pet
data4aa <- subset(data4aa, , c(year, damagecause, county, commodity, state, matrixnumber, cube_loss, pr, pdsi, pet, tmmx, fm100, fm1000, price, prpet))
#write.csv(data4aa, file = "/dmine/data/USDA/agmesh-scenarios/Allstates/summaries/lag_palouse1.csv")
data4aa$pr_scaled <- scale(data4aa$pr, scale = TRUE, center = TRUE)
data4aa$tmmx_scaled <- scale(data4aa$tmmx, scale = TRUE, center = TRUE)
data4aa$pet_scaled <- scale(data4aa$pet, scale = TRUE, center = TRUE)
data4aa$pdsi_scaled <- scale(data4aa$pdsi, scale = TRUE, center = TRUE)
data4aa$price_scaled <- scale(data4aa$price, scale = TRUE, center = TRUE)
#percentage acreage by county and year, per state
library(plotrix)
library(ggplot2)
library(gridExtra)
ID2001 <- read.csv("data/climatology/Idaho_2001_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2002 <- read.csv("data/climatology/Idaho_2002_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2003 <- read.csv("data/climatology/Idaho_2003_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2004 <- read.csv("data/climatology/Idaho_2004_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2005 <- read.csv("data/climatology/Idaho_2005_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2006 <- read.csv("data/climatology/Idaho_2006_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2007 <- read.csv("data/climatology/Idaho_2007_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2008 <- read.csv("data/climatology/Idaho_2008_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2009 <- read.csv("data/climatology/Idaho_2009_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2010 <- read.csv("data/climatology/Idaho_2010_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2011 <- read.csv("data/climatology/Idaho_2011_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2012 <- read.csv("data/climatology/Idaho_2012_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2013 <- read.csv("data/climatology/Idaho_2013_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2014 <- read.csv("data/climatology/Idaho_2014_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2015 <- read.csv("data/climatology/Idaho_2015_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2001 <- read.csv("data/climatology/Oregon_2001_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2002 <- read.csv("data/climatology/Oregon_2002_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2003 <- read.csv("data/climatology/Oregon_2003_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2004 <- read.csv("data/climatology/Oregon_2004_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2005 <- read.csv("data/climatology/Oregon_2005_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2006 <- read.csv("data/climatology/Oregon_2006_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2007 <- read.csv("data/climatology/Oregon_2007_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2008 <- read.csv("data/climatology/Oregon_2008_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2009 <- read.csv("data/climatology/Oregon_2009_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2010 <- read.csv("data/climatology/Oregon_2010_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2011 <- read.csv("data/climatology/Oregon_2011_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2012 <- read.csv("data/climatology/Oregon_2012_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2013 <- read.csv("data/climatology/Oregon_2013_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2014 <- read.csv("data/climatology/Oregon_2014_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2015 <- read.csv("data/climatology/Oregon_2015_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2001 <- read.csv("data/climatology/Washington_2001_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2002 <- read.csv("data/climatology/Washington_2002_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2003 <- read.csv("data/climatology/Washington_2003_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2004 <- read.csv("data/climatology/Washington_2004_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2005 <- read.csv("data/climatology/Washington_2005_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2006 <- read.csv("data/climatology/Washington_2006_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2007 <- read.csv("data/climatology/Washington_2007_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2008 <- read.csv("data/climatology/Washington_2008_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2009 <- read.csv("data/climatology/Washington_2009_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2010 <- read.csv("data/climatology/Washington_2010_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2011 <- read.csv("data/climatology/Washington_2011_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2012 <- read.csv("data/climatology/Washington_2012_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2013 <- read.csv("data/climatology/Washington_2013_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2014 <- read.csv("data/climatology/Washington_2014_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2015 <- read.csv("data/climatology/Washington_2015_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ziggy.df <- rbind(ID2001, ID2002, ID2003, ID2004, ID2005, ID2006, ID2007, ID2008, ID2009, ID2010, ID2011, ID2012, ID2013, ID2014, ID2015)
##remove month
ziggy.df <- ziggy.df[-17]
ID1 <- aggregate(.~countyfips + year, ziggy.df, sum)
ID1 <- aggregate(.~countyfips, ID1, mean)
ziggy.df <- rbind(WA2001, WA2002, WA2003, WA2004, WA2005, WA2006, WA2007, WA2008, WA2009, WA2010, WA2011, WA2012, WA2013, WA2014, WA2015)
##remove month
ziggy.df <- ziggy.df[-17]
WA1 <- aggregate(.~countyfips + year, ziggy.df, sum)
WA1 <- aggregate(.~countyfips, WA1, mean)
ziggy.df <- rbind(OR2001, OR2002, OR2003, OR2004, OR2005, OR2006, OR2007, OR2008, OR2009, OR2010, OR2011, OR2012, OR2013, OR2014, OR2015)
##remove month
ziggy.df <- ziggy.df[-17]
OR1 <- aggregate(.~countyfips + year, ziggy.df, sum)
OR1 <- aggregate(.~countyfips, OR1, mean)
countiesfips <- read.csv("data/counties/counties_fips.csv",
header = TRUE, strip.white = TRUE, sep = ",")
#countiesfips <- read.csv("/tmp/countiesfips.csv",
#header = TRUE, strip.white = TRUE, sep = ",")
#countiesfips <- countiesfips[-1]
colnames(countiesfips) <- c("countyfips", "county", "state")
countiesfips$countyfips <- sprintf("%05d", countiesfips$countyfips)
OR2 <- merge(countiesfips, OR1, by=("countyfips"))
ID2 <- merge(countiesfips, ID1, by=("countyfips"))
WA2 <- merge(countiesfips, WA1, by=("countyfips"))
rma1989 <- read.csv("data/RMA_originaldata/1989.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1990 <- read.csv("data/RMA_originaldata/1990.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1991 <- read.csv("data/RMA_originaldata/1991.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1992 <- read.csv("data/RMA_originaldata/1992.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1993 <- read.csv("data/RMA_originaldata/1993.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1994 <- read.csv("data/RMA_originaldata/1994.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1995 <- read.csv("data/RMA_originaldata/1995.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1996 <- read.csv("data/RMA_originaldata/1996.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1997 <- read.csv("data/RMA_originaldata/1997.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1998 <- read.csv("data/RMA_originaldata/1998.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1999 <- read.csv("data/RMA_originaldata/1999.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2000 <- read.csv("data/RMA_originaldata/2000.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2001 <- read.csv("data/RMA_originaldata/2001.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2002 <- read.csv("data/RMA_originaldata/2002.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2003 <- read.csv("data/RMA_originaldata/2003.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2004 <- read.csv("data/RMA_originaldata/2004.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2005 <- read.csv("data/RMA_originaldata/2005.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2006 <- read.csv("data/RMA_originaldata/2006.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2007 <- read.csv("data/RMA_originaldata/2007.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2008 <- read.csv("data/RMA_originaldata/2008.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2009 <- read.csv("data/RMA_originaldata/2009.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2010 <- read.csv("data/RMA_originaldata/2010.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2011 <- read.csv("data/RMA_originaldata/2011.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2012 <- read.csv("data/RMA_originaldata/2012.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2013 <- read.csv("data/RMA_originaldata/2013.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2014 <- read.csv("data/RMA_originaldata/2014.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2015 <- read.csv("data/RMA_originaldata/2015.txt", sep = "|", header = FALSE, strip.white = TRUE)
#load individual files from 1989 to 2000
RMA_1989 <- rbind(rma1989, rma1990, rma1991, rma1992, rma1993, rma1994, rma1995, rma1996, rma1997, rma1998, rma1999, rma2000)
#filter columns
RMA_1989 <- data.frame(RMA_1989[,1], RMA_1989[,3], RMA_1989[,5], RMA_1989[,7], RMA_1989[,12], RMA_1989[,13], RMA_1989[,14], RMA_1989[,15])
#set column names
colnames(RMA_1989) <- c("year", "state", "county", "commodity", "damagecause", "monthcode", "month", "loss")
#load individual files from 2001 to 2015
RMA <- rbind(rma2001, rma2002, rma2003, rma2004, rma2005, rma2006, rma2007, rma2008, rma2009, rma2010, rma2011, rma2012, rma2013, rma2014, rma2015)
#filter columns
RMA <- data.frame(RMA[,1], RMA[,3], RMA[,5], RMA[,7], RMA[,12], RMA[,13], RMA[,14], RMA[,15], RMA[,16])
#set column names
colnames(RMA) <- c("year", "state", "county", "commodity", "damagecause", "monthcode", "month", "acres", "loss")
#subset 2001 to 2015 for ID, WA, and OR
RMA_PNW <- subset(RMA, state == "WA" | state == "OR" | state == "ID" )
#subset 1989 to 2000 for ID, WA, and OR
RMA_PNW_1989 <- subset(RMA_1989, state == "WA" | state == "OR" | state == "ID" )
#temp = list.files(pattern = paste("Idaho", "_summary", sep=""))
#myfiles = lapply(temp, read.csv, header = TRUE)
#ziggy.df <- do.call(rbind , myfiles)
RMA_PNW <- as.data.frame(RMA_PNW)
RMA_PNW$county <- as.character(RMA_PNW$county)
RMA_PNW$damagecause <- as.character(RMA_PNW$damagecause)
xrange <- RMA_PNW
RMA_PNW$commodity <- trimws(RMA_PNW$commodity)
RMA_PNW$county <- trimws(RMA_PNW$county)
RMA_PNW$damagecause <- trimws(RMA_PNW$damagecause)
ID_RMA_PNW <- subset(RMA_PNW, state == "ID")
OR_RMA_PNW <- subset(RMA_PNW, state == "OR")
WA_RMA_PNW <- subset(RMA_PNW, state == "WA")
#--OR
#--acres for all damage causes aggregated
OR_loss_commodity <- subset(OR_RMA_PNW, commodity == var_commodity)
OR_loss_all <- aggregate(OR_loss_commodity$acres, by=list(OR_loss_commodity$year, OR_loss_commodity$county, OR_loss_commodity$state), FUN = "sum")
colnames(OR_loss_all) <- c("year", "county", "state", "all_damagecause_acres")
OR_loss1 <- subset(OR_RMA_PNW, commodity == var_commodity & damagecause == var_damage)
OR_loss2 <- aggregate(OR_loss1$loss, by=list(OR_loss1$year, OR_loss1$county, OR_loss1$state), FUN = "sum")
OR_loss2_acres <- aggregate(OR_loss1$acres, by=list(OR_loss1$year, OR_loss1$county, OR_loss1$state), FUN = "sum")
colnames(OR_loss2) <- c("year", "county", "state", "loss")
colnames(OR_loss2_acres) <- c("year", "county", "state", "acres")
OR_loss2$acres <- OR_loss2_acres$acres
OR_loss2$lossperacre <- OR_loss2$loss / OR_loss2$acres
OR_loss_climate <- merge(OR_loss2, OR2[-4], by=c("county", "state"))
OR_loss_climate_2 <- merge(OR_loss_all, OR_loss_climate, by=c("year", "county", "state"))
#-WA
#--acres for all damage causes aggregated
WA_loss_commodity <- subset(WA_RMA_PNW, commodity == var_commodity)
WA_loss_all <- aggregate(WA_loss_commodity$acres, by=list(WA_loss_commodity$year, WA_loss_commodity$county, WA_loss_commodity$state), FUN = "sum")
colnames(WA_loss_all) <- c("year", "county", "state", "all_damagecause_acres")
WA_loss1 <- subset(WA_RMA_PNW, commodity == var_commodity & damagecause == var_damage)
WA_loss2 <- aggregate(WA_loss1$loss, by=list(WA_loss1$year, WA_loss1$county, WA_loss1$state), FUN = "sum")
WA_loss2_acres <- aggregate(WA_loss1$acres, by=list(WA_loss1$year, WA_loss1$county, WA_loss1$state), FUN = "sum")
colnames(WA_loss2) <- c("year", "county", "state", "loss")
colnames(WA_loss2_acres) <- c("year", "county", "state", "acres")
WA_loss2$acres <- WA_loss2_acres$acres
WA_loss2$lossperacre <- WA_loss2$loss / WA_loss2$acres
WA_loss_climate <- merge(WA_loss2, WA2[-4], by=c("county", "state"))
WA_loss_climate_2 <- merge(WA_loss_all, WA_loss_climate, by=c("year", "county", "state"))
#-ID
#--acres for all damage causes aggregated
ID_loss_commodity <- subset(ID_RMA_PNW, commodity == var_commodity)
ID_loss_all <- aggregate(ID_loss_commodity$acres, by=list(ID_loss_commodity$year, ID_loss_commodity$county, ID_loss_commodity$state), FUN = "sum")
colnames(ID_loss_all) <- c("year", "county", "state", "all_damagecause_acres")
ID_loss1 <- subset(ID_RMA_PNW, commodity == var_commodity & damagecause == var_damage)
ID_loss2 <- aggregate(ID_loss1$loss, by=list(ID_loss1$year, ID_loss1$county, ID_loss1$state), FUN = "sum")
ID_loss2_acres <- aggregate(ID_loss1$acres, by=list(ID_loss1$year, ID_loss1$county, ID_loss1$state), FUN = "sum")
colnames(ID_loss2) <- c("year", "county", "state", "loss")
colnames(ID_loss2_acres) <- c("year", "county", "state", "acres")
ID_loss2$acres <- ID_loss2_acres$acres
ID_loss2$lossperacre <- ID_loss2$loss / ID_loss2$acres
ID_loss_climate <- merge(ID_loss2, ID2[-4], by=c("county", "state"))
ID_loss_climate_2 <- merge(ID_loss_all, ID_loss_climate, by=c("year", "county", "state"))
merged_iPNW <- rbind(ID_loss_climate_2, WA_loss_climate_2, OR_loss_climate_2)
merged_iPNW$pct_acreage <- merged_iPNW$acres / merged_iPNW$all_damagecause_acres
#
#--plotting results of individual variables
pairs(cube_loss ~ pr_scaled + tmmx_scaled + pet_scaled + pdsi_scaled, data = data4aa, col = as.factor(data4aa$state),
lower.panel=panel.smooth, upper.panel=panel.cor, main = "initial pairs plot")
par(mar=c(12.7, 6.1, 4, 2.1), family = 'serif', mgp=c(4, 1, 0), las=0)
data4aaaa <- data4aa
ggplot(data4aaaa, aes(pet_scaled, cube_loss, color = state)) +
geom_point(aes(size = price)) +
stat_smooth(aes(group = 1), method = "loess") + labs(x = "Scaled Potential Evapotranspiration (mm)", y = "Cube root loss ($)") + theme(axis.title.y = element_text(family = "Serif", size=16)) + theme(axis.title.x = element_text(family = "Serif", size=16), axis.text.x = element_text(size=rel(1.5), hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.5), hjust = 1, family = "Serif"))
ggplot(data4aaaa, aes(pr_scaled, cube_loss, color = state)) +
geom_point(aes(size = price)) +
stat_smooth(aes(group = 1), method = "loess") + labs(x = "Scaled Precipitation (mm)", y = "Cube root loss ($)") + theme(axis.title.y = element_text(family = "Serif", size=16)) + theme(axis.title.x = element_text(family = "Serif", size=16), axis.text.x = element_text(size=rel(1.5), hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.5), hjust = 1, family = "Serif"))
ggplot(data4aaaa, aes(tmmx_scaled, cube_loss, color = state)) +
geom_point(aes(size = price)) +
stat_smooth(aes(group = 1), method = "loess") + labs(x = "Scaled Max Temperature (C)", y = "Cube root loss ($)") + theme(axis.title.y = element_text(family = "Serif", size=16)) + theme(axis.title.x = element_text(family = "Serif", size=16), axis.text.x = element_text(size=rel(1.5), hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.5), hjust = 1, family = "Serif"))
ggplot(data4aaaa, aes(pdsi_scaled, cube_loss, color = state)) +
geom_point(aes(size = price)) +
stat_smooth(aes(group = 1), method = "loess") + labs(x = "Scaled PDSI", y = "Cube root loss ($)") + theme(axis.title.y = element_text(family = "Serif", size=16)) + theme(axis.title.x = element_text(family = "Serif", size=16), axis.text.x = element_text(size=rel(1.5), hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.5), hjust = 1, family = "Serif"))
ggplot(data4aaaa, aes(prpet, cube_loss, color = state)) +
geom_point(aes(size = price)) +
stat_smooth(aes(group = 1), method = "loess") + labs(x = "Aridity Index (PR / PET)", y = "Cube root loss ($)") + theme(axis.title.y = element_text(family = "Serif", size=16)) + theme(axis.title.x = element_text(family = "Serif", size=16), axis.text.x = element_text(size=rel(1.5), hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.5), hjust = 1, family = "Serif"))
library(maptools)
library(RColorBrewer)
library(leaflet)
library(raster)
library(RCurl)
library(party)
# Make big tree
#form <- as.formula(loss_zscore ~ pr_zscore + tmmx_zscore + pet_zscore)
#form2 <- as.formula(cube_loss ~ pr + tmmx + pet + pdsi + price)
#form2a <- as.formula(loss ~ pr + tmmx + pet + pdsi + price)
#form2b <- as.formula(cube_loss ~ pr.x + tmmx.x + pet.x + pdsi.x + price)
#form3 <- as.formula(pct_acreage ~ pr + tmmx + pet + pdsi + sph + srad + vs + th + erc + fm100 + fm1000 + price)
#form3 <- as.formula(pct_acreage ~ pr + tmmx + pet + pdsi + price)
#form3a <- as.formula(pct_acreage ~ pr + tmmx + pet + pdsi + sph + srad + vs + th + erc + fm100 + fm1000 + price)
data44a <- data.frame(data4aaaa)
data44a <- merge(merged_iPNW, data44a, by=c("state", "county","year"))
data5a_statecountyear <- cbind.data.frame(data44a$state, data44a$county, data44a$year)
colnames(data5a_statecountyear) <- c("state", "county", "year")
data44a$tmmx.x <- data44a$tmmx.x - 273
data5a <- cbind.data.frame(data44a$year, data44a$cube_loss, data44a$loss, data44a$pr_scaled, data44a$tmmx_scaled, data44a$fm100.x, data44a$fm1000.x, data44a$pdsi_scaled, data44a$erc, data44a$srad, data44a$vs, data44a$sph, data44a$th, data44a$pet_scaled, data44a$pct_acreage, data44a$price_scaled, data44a$state)
colnames(data5a) <- c("year", "cube_loss", "loss", "pr", "tmmx", "fm100", "fm1000", "pdsi", "erc", "srad", "vs", "sph", "th", "pet", "pct_acreage", "price", "state" )
data5a <- data.frame(data5a)
data5a$loss <- log10(data5a$loss)
data5ab <- cbind.data.frame(data5a$loss, data5a$pr, data5a$tmmx, data5a$pdsi, data5a$pet, data5a$price)
colnames(data5ab) <- c("loss", "pr", "tmmx", "pdsi", "pet", "price")
data5ab <- data.frame(data5ab)
#data5ab$loss <- log10(data5ab$loss)
# load libraries
library(caret)
library(rpart)
# define training control
train_control<- trainControl(method="repeatedcv", number=10, savePredictions = TRUE)
data5b <- data.frame(data5a)
data5b <- na.omit(data5b)
data5ab <- data.frame(data5ab)
data5ab <- na.omit(data5ab)
#set up train and test for two model datasets: 1) pct acreage and 2) seasonal loss
set.seed(9992)
#trainIndex <- sample(1:nrow(data5b), 0.8 * nrow(data5b))
trainIndex <- createDataPartition(data5b$pct_acreage, p = .75, list = FALSE)
train <- data5b[trainIndex,]
test <- data5b[-trainIndex,]
trainIndex_loss <- caret::createDataPartition(data5ab$loss, p = .75, list = FALSE)
train_loss <- data5ab[trainIndex_loss,]
test_loss <- data5ab[-trainIndex_loss,]
#set the range of mtry for random forest
rf_grid <- expand.grid(mtry = 2:5) # you can put different values for mtry
#model setup
#climate vs pct drought acreage model
form3 <- as.formula(pct_acreage ~ pr + tmmx + pet + pdsi + price)
model<- caret::train(form3, data=train, trControl=train_control, importance=T, method="rf", tuneGrid = rf_grid, ntrees = 500)
model_singular <- caret::train(form3, data=train, trControl=train_control, method="rpart")
model_loss_all_acreage <- caret::train(form3, data=data5b, trControl=train_control, method="rf", ntrees = 500, tuneGrid = rf_grid, importance = T)
#climate vs seasonal loss ($)
form2a <- as.formula(loss ~ pr + tmmx + pet + pdsi + price)
model_loss <- caret::train(form2a, data=train_loss, trControl=train_control, method="rf", tuneGrid = rf_grid, ntrees = 1000, importance = T)
model_loss_all <- caret::train(form2a, data=data5b, trControl=train_control, method="rf", ntrees = 500, tuneGrid = rf_grid, importance = T)
#cforest_model_loss <- cforest(form2a,
# data = train_loss,
# controls=cforest_unbiased(ntree=2000, mtry=5))
#lattice::densityplot(model_loss,
# adjust = 1.25)
tree_num <- which(model_loss$finalModel$err.rate[, 1] == min(model_loss$finalModel$err.rate[, 1]))
lda_data <- learning_curve_dat(dat = model$trainingData,
outcome = ".outcome",
## `train` arguments:
metric = "RMSE",
trControl = train_control,
method = "rf")
## Training for 10% (n = 26)
## Training for 20% (n = 52)
## Training for 30% (n = 79)
## Training for 40% (n = 105)
## Training for 50% (n = 132)
## Training for 60% (n = 158)
## Training for 70% (n = 184)
## Training for 80% (n = 211)
## Training for 90% (n = 237)
## Training for 100% (n = 264)
pts <- pretty(lda_data$RMSE)
#pts <- c(0,0.1, 0.2, 0.3, 0.4)
lda_data$Data[lda_data$Data == "Resampling"] <- c("Validation")
ggplot(lda_data, aes(x = Training_Size, y = RMSE, color = Data)) +
geom_smooth(method = loess, span = .8) +
theme_bw() + ggtitle("Random Forest Learning Curves: Train vs. Validation") + theme(axis.title.y = element_text(family = "Serif", size=18), axis.title.x = element_text(family = "Serif", size = 18), axis.text.x = element_text(size=rel(1.9), angle = 90, hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.9), hjust = 1, family = "Serif")) + theme(plot.title = element_text(family = "Serif", vjust = 2)) + theme(legend.text=element_text(family = "Serif", size=14)) + theme(legend.title=element_text(family = "Serif", size=16, face = "bold")) + theme(plot.title = element_text(size=24, face = "bold")) + scale_y_continuous(labels = pts, breaks = pts ) + xlab("Training Size") + ylab("RMSE") + theme(legend.position="bottom0") + scale_fill_discrete(name = "Legend")
sqrt(model_loss$finalModel$mse[which.min(model_loss$finalModel$mse)])
## [1] 0.720125
which.min(model_loss$finalModel$mse)
## [1] 48
importance <- varImp(model_loss, scale=FALSE)
importance2 <- importance$importance
importance2$variable <- rownames(importance2)
importance2 <- importance2[order(-importance2$Overall),]
par(mar=c(6, 7, 2, 3), family = 'serif', mgp=c(5, 1, 0), las=0)
barplot(sort(importance2$Overall), horiz = TRUE, col = "lightblue", ylab = "predictor variables", xlab = "% importance", cex.lab = 1.7, las = 2, cex.axis = 1.8, cex.names = 1.8, names.arg = rev(importance2$variable))
#NRMSE
nrmse_func <- function(obs, pred, type = "sd") {
squared_sums <- sum((obs - pred)^2)
mse <- squared_sums/length(obs)
rmse <- sqrt(mse)
if (type == "sd") nrmse <- rmse/sd(obs)
if (type == "mean") nrmse <- rmse/mean(obs)
if (type == "maxmin") nrmse <- rmse/ (max(obs) - min(obs))
if (type == "iq") nrmse <- rmse/ (quantile(obs, 0.75) - quantile(obs, 0.25))
if (!type %in% c("mean", "sd", "maxmin", "iq")) message("Wrong type!")
nrmse <- round(nrmse, 3)
return(nrmse)
}
#ensembe
library(caretEnsemble)
#algorithmList <- c('gbm', 'rpart', 'ctree', 'rf', 'HYFIS', 'FS.HGD', 'ANFIS' )
algorithmList <- c('gbm', 'rpart', 'ctree', 'rf')
set.seed(100)
form2 <- as.formula(loss ~ pr + tmmx + pet + pdsi + price)
models <- caretList(form2, data=data5b, trControl=train_control, methodList=algorithmList)
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8895 -nan 0.1000 0.0408
## 2 0.8506 -nan 0.1000 0.0307
## 3 0.8127 -nan 0.1000 0.0199
## 4 0.7874 -nan 0.1000 0.0263
## 5 0.7662 -nan 0.1000 0.0140
## 6 0.7467 -nan 0.1000 0.0131
## 7 0.7290 -nan 0.1000 0.0110
## 8 0.7097 -nan 0.1000 0.0112
## 9 0.6945 -nan 0.1000 0.0104
## 10 0.6756 -nan 0.1000 0.0095
## 20 0.5871 -nan 0.1000 -0.0002
## 40 0.5058 -nan 0.1000 -0.0034
## 60 0.4774 -nan 0.1000 -0.0021
## 80 0.4606 -nan 0.1000 -0.0027
## 100 0.4463 -nan 0.1000 -0.0011
## 120 0.4361 -nan 0.1000 -0.0005
## 140 0.4261 -nan 0.1000 -0.0014
## 150 0.4209 -nan 0.1000 -0.0012
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8710 -nan 0.1000 0.0651
## 2 0.8167 -nan 0.1000 0.0428
## 3 0.7784 -nan 0.1000 0.0398
## 4 0.7421 -nan 0.1000 0.0279
## 5 0.7040 -nan 0.1000 0.0299
## 6 0.6771 -nan 0.1000 0.0184
## 7 0.6547 -nan 0.1000 0.0198
## 8 0.6328 -nan 0.1000 0.0158
## 9 0.6121 -nan 0.1000 0.0134
## 10 0.5972 -nan 0.1000 0.0093
## 20 0.5111 -nan 0.1000 -0.0047
## 40 0.4404 -nan 0.1000 0.0015
## 60 0.3972 -nan 0.1000 -0.0039
## 80 0.3672 -nan 0.1000 0.0015
## 100 0.3393 -nan 0.1000 -0.0020
## 120 0.3239 -nan 0.1000 -0.0027
## 140 0.3100 -nan 0.1000 -0.0020
## 150 0.3006 -nan 0.1000 -0.0029
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8612 -nan 0.1000 0.0662
## 2 0.8066 -nan 0.1000 0.0344
## 3 0.7598 -nan 0.1000 0.0452
## 4 0.7214 -nan 0.1000 0.0260
## 5 0.6805 -nan 0.1000 0.0340
## 6 0.6475 -nan 0.1000 0.0270
## 7 0.6231 -nan 0.1000 0.0185
## 8 0.6044 -nan 0.1000 0.0176
## 9 0.5839 -nan 0.1000 0.0201
## 10 0.5652 -nan 0.1000 0.0105
## 20 0.4564 -nan 0.1000 0.0069
## 40 0.3769 -nan 0.1000 0.0002
## 60 0.3301 -nan 0.1000 -0.0026
## 80 0.2988 -nan 0.1000 -0.0049
## 100 0.2769 -nan 0.1000 -0.0023
## 120 0.2591 -nan 0.1000 -0.0016
## 140 0.2433 -nan 0.1000 -0.0011
## 150 0.2357 -nan 0.1000 -0.0030
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8537 -nan 0.1000 0.0361
## 2 0.8184 -nan 0.1000 0.0313
## 3 0.7865 -nan 0.1000 0.0256
## 4 0.7575 -nan 0.1000 0.0235
## 5 0.7388 -nan 0.1000 0.0176
## 6 0.7171 -nan 0.1000 0.0189
## 7 0.6971 -nan 0.1000 0.0160
## 8 0.6823 -nan 0.1000 0.0089
## 9 0.6688 -nan 0.1000 0.0088
## 10 0.6532 -nan 0.1000 0.0132
## 20 0.5653 -nan 0.1000 0.0041
## 40 0.4998 -nan 0.1000 0.0008
## 60 0.4733 -nan 0.1000 -0.0008
## 80 0.4549 -nan 0.1000 -0.0023
## 100 0.4431 -nan 0.1000 -0.0003
## 120 0.4290 -nan 0.1000 -0.0020
## 140 0.4175 -nan 0.1000 -0.0013
## 150 0.4131 -nan 0.1000 -0.0007
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8342 -nan 0.1000 0.0531
## 2 0.7908 -nan 0.1000 0.0451
## 3 0.7503 -nan 0.1000 0.0356
## 4 0.7150 -nan 0.1000 0.0360
## 5 0.6830 -nan 0.1000 0.0219
## 6 0.6552 -nan 0.1000 0.0194
## 7 0.6334 -nan 0.1000 0.0160
## 8 0.6136 -nan 0.1000 0.0090
## 9 0.5979 -nan 0.1000 0.0142
## 10 0.5794 -nan 0.1000 0.0130
## 20 0.4886 -nan 0.1000 -0.0006
## 40 0.4178 -nan 0.1000 -0.0003
## 60 0.3823 -nan 0.1000 0.0006
## 80 0.3575 -nan 0.1000 -0.0029
## 100 0.3366 -nan 0.1000 -0.0038
## 120 0.3179 -nan 0.1000 -0.0014
## 140 0.3033 -nan 0.1000 -0.0012
## 150 0.2981 -nan 0.1000 -0.0021
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8378 -nan 0.1000 0.0605
## 2 0.7833 -nan 0.1000 0.0458
## 3 0.7453 -nan 0.1000 0.0377
## 4 0.7048 -nan 0.1000 0.0381
## 5 0.6683 -nan 0.1000 0.0276
## 6 0.6430 -nan 0.1000 0.0200
## 7 0.6183 -nan 0.1000 0.0200
## 8 0.5946 -nan 0.1000 0.0158
## 9 0.5727 -nan 0.1000 0.0095
## 10 0.5586 -nan 0.1000 0.0097
## 20 0.4594 -nan 0.1000 0.0033
## 40 0.3770 -nan 0.1000 0.0004
## 60 0.3454 -nan 0.1000 -0.0011
## 80 0.3115 -nan 0.1000 -0.0006
## 100 0.2870 -nan 0.1000 -0.0056
## 120 0.2655 -nan 0.1000 -0.0026
## 140 0.2491 -nan 0.1000 -0.0020
## 150 0.2397 -nan 0.1000 -0.0024
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8996 -nan 0.1000 0.0367
## 2 0.8657 -nan 0.1000 0.0330
## 3 0.8323 -nan 0.1000 0.0302
## 4 0.8055 -nan 0.1000 0.0222
## 5 0.7864 -nan 0.1000 0.0190
## 6 0.7681 -nan 0.1000 0.0098
## 7 0.7442 -nan 0.1000 0.0207
## 8 0.7251 -nan 0.1000 0.0126
## 9 0.7082 -nan 0.1000 0.0071
## 10 0.6923 -nan 0.1000 0.0136
## 20 0.5987 -nan 0.1000 0.0081
## 40 0.5115 -nan 0.1000 -0.0008
## 60 0.4793 -nan 0.1000 0.0010
## 80 0.4585 -nan 0.1000 -0.0010
## 100 0.4474 -nan 0.1000 -0.0009
## 120 0.4352 -nan 0.1000 -0.0004
## 140 0.4202 -nan 0.1000 -0.0019
## 150 0.4122 -nan 0.1000 -0.0033
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8716 -nan 0.1000 0.0645
## 2 0.8188 -nan 0.1000 0.0463
## 3 0.7705 -nan 0.1000 0.0367
## 4 0.7329 -nan 0.1000 0.0294
## 5 0.7062 -nan 0.1000 0.0225
## 6 0.6829 -nan 0.1000 0.0192
## 7 0.6631 -nan 0.1000 0.0194
## 8 0.6416 -nan 0.1000 0.0164
## 9 0.6215 -nan 0.1000 0.0122
## 10 0.6068 -nan 0.1000 0.0123
## 20 0.5080 -nan 0.1000 -0.0021
## 40 0.4318 -nan 0.1000 -0.0018
## 60 0.3933 -nan 0.1000 -0.0038
## 80 0.3620 -nan 0.1000 -0.0026
## 100 0.3367 -nan 0.1000 -0.0007
## 120 0.3206 -nan 0.1000 -0.0035
## 140 0.3007 -nan 0.1000 -0.0017
## 150 0.2939 -nan 0.1000 -0.0021
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8666 -nan 0.1000 0.0578
## 2 0.8057 -nan 0.1000 0.0520
## 3 0.7571 -nan 0.1000 0.0363
## 4 0.7083 -nan 0.1000 0.0342
## 5 0.6709 -nan 0.1000 0.0202
## 6 0.6494 -nan 0.1000 0.0175
## 7 0.6227 -nan 0.1000 0.0202
## 8 0.5996 -nan 0.1000 0.0195
## 9 0.5827 -nan 0.1000 0.0123
## 10 0.5656 -nan 0.1000 0.0128
## 20 0.4649 -nan 0.1000 0.0044
## 40 0.3855 -nan 0.1000 -0.0036
## 60 0.3353 -nan 0.1000 -0.0019
## 80 0.2982 -nan 0.1000 -0.0013
## 100 0.2743 -nan 0.1000 -0.0044
## 120 0.2560 -nan 0.1000 -0.0034
## 140 0.2382 -nan 0.1000 -0.0012
## 150 0.2290 -nan 0.1000 -0.0024
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8915 -nan 0.1000 0.0402
## 2 0.8488 -nan 0.1000 0.0333
## 3 0.8117 -nan 0.1000 0.0330
## 4 0.7797 -nan 0.1000 0.0256
## 5 0.7559 -nan 0.1000 0.0205
## 6 0.7309 -nan 0.1000 0.0195
## 7 0.7115 -nan 0.1000 0.0132
## 8 0.6947 -nan 0.1000 0.0109
## 9 0.6810 -nan 0.1000 0.0126
## 10 0.6668 -nan 0.1000 0.0069
## 20 0.5598 -nan 0.1000 0.0043
## 40 0.4853 -nan 0.1000 0.0005
## 60 0.4610 -nan 0.1000 0.0000
## 80 0.4427 -nan 0.1000 -0.0002
## 100 0.4307 -nan 0.1000 -0.0022
## 120 0.4178 -nan 0.1000 -0.0014
## 140 0.4081 -nan 0.1000 0.0000
## 150 0.4026 -nan 0.1000 -0.0021
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8746 -nan 0.1000 0.0560
## 2 0.8114 -nan 0.1000 0.0438
## 3 0.7576 -nan 0.1000 0.0389
## 4 0.7166 -nan 0.1000 0.0393
## 5 0.6854 -nan 0.1000 0.0295
## 6 0.6513 -nan 0.1000 0.0197
## 7 0.6344 -nan 0.1000 0.0124
## 8 0.6147 -nan 0.1000 0.0143
## 9 0.5941 -nan 0.1000 0.0100
## 10 0.5786 -nan 0.1000 0.0129
## 20 0.4839 -nan 0.1000 0.0050
## 40 0.4085 -nan 0.1000 0.0042
## 60 0.3713 -nan 0.1000 -0.0010
## 80 0.3357 -nan 0.1000 -0.0025
## 100 0.3141 -nan 0.1000 -0.0005
## 120 0.2985 -nan 0.1000 -0.0016
## 140 0.2857 -nan 0.1000 -0.0014
## 150 0.2788 -nan 0.1000 -0.0023
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8690 -nan 0.1000 0.0714
## 2 0.8115 -nan 0.1000 0.0489
## 3 0.7546 -nan 0.1000 0.0472
## 4 0.7122 -nan 0.1000 0.0373
## 5 0.6663 -nan 0.1000 0.0356
## 6 0.6378 -nan 0.1000 0.0261
## 7 0.6070 -nan 0.1000 0.0265
## 8 0.5808 -nan 0.1000 0.0221
## 9 0.5551 -nan 0.1000 0.0151
## 10 0.5423 -nan 0.1000 0.0088
## 20 0.4427 -nan 0.1000 0.0030
## 40 0.3642 -nan 0.1000 0.0014
## 60 0.3225 -nan 0.1000 -0.0028
## 80 0.2880 -nan 0.1000 -0.0021
## 100 0.2661 -nan 0.1000 -0.0017
## 120 0.2457 -nan 0.1000 -0.0041
## 140 0.2294 -nan 0.1000 -0.0021
## 150 0.2241 -nan 0.1000 -0.0019
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8709 -nan 0.1000 0.0416
## 2 0.8328 -nan 0.1000 0.0349
## 3 0.8104 -nan 0.1000 0.0180
## 4 0.7848 -nan 0.1000 0.0239
## 5 0.7575 -nan 0.1000 0.0247
## 6 0.7362 -nan 0.1000 0.0188
## 7 0.7177 -nan 0.1000 0.0149
## 8 0.7021 -nan 0.1000 0.0134
## 9 0.6896 -nan 0.1000 0.0113
## 10 0.6730 -nan 0.1000 0.0069
## 20 0.5846 -nan 0.1000 0.0012
## 40 0.5109 -nan 0.1000 0.0020
## 60 0.4797 -nan 0.1000 0.0008
## 80 0.4657 -nan 0.1000 0.0004
## 100 0.4497 -nan 0.1000 0.0002
## 120 0.4346 -nan 0.1000 -0.0010
## 140 0.4243 -nan 0.1000 -0.0015
## 150 0.4189 -nan 0.1000 -0.0007
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8670 -nan 0.1000 0.0563
## 2 0.8199 -nan 0.1000 0.0422
## 3 0.7663 -nan 0.1000 0.0440
## 4 0.7263 -nan 0.1000 0.0319
## 5 0.6858 -nan 0.1000 0.0235
## 6 0.6575 -nan 0.1000 0.0229
## 7 0.6327 -nan 0.1000 0.0198
## 8 0.6122 -nan 0.1000 0.0183
## 9 0.5961 -nan 0.1000 0.0098
## 10 0.5824 -nan 0.1000 0.0067
## 20 0.4857 -nan 0.1000 -0.0004
## 40 0.4271 -nan 0.1000 -0.0021
## 60 0.3856 -nan 0.1000 0.0010
## 80 0.3536 -nan 0.1000 -0.0037
## 100 0.3300 -nan 0.1000 0.0004
## 120 0.3110 -nan 0.1000 -0.0033
## 140 0.2926 -nan 0.1000 -0.0024
## 150 0.2871 -nan 0.1000 -0.0042
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8646 -nan 0.1000 0.0573
## 2 0.8052 -nan 0.1000 0.0459
## 3 0.7565 -nan 0.1000 0.0442
## 4 0.7096 -nan 0.1000 0.0387
## 5 0.6769 -nan 0.1000 0.0272
## 6 0.6561 -nan 0.1000 0.0152
## 7 0.6302 -nan 0.1000 0.0196
## 8 0.6042 -nan 0.1000 0.0232
## 9 0.5831 -nan 0.1000 0.0158
## 10 0.5619 -nan 0.1000 0.0075
## 20 0.4426 -nan 0.1000 0.0043
## 40 0.3703 -nan 0.1000 -0.0032
## 60 0.3177 -nan 0.1000 -0.0027
## 80 0.2890 -nan 0.1000 -0.0003
## 100 0.2662 -nan 0.1000 -0.0012
## 120 0.2434 -nan 0.1000 -0.0011
## 140 0.2281 -nan 0.1000 -0.0022
## 150 0.2211 -nan 0.1000 -0.0024
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.9078 -nan 0.1000 0.0420
## 2 0.8673 -nan 0.1000 0.0404
## 3 0.8299 -nan 0.1000 0.0305
## 4 0.8002 -nan 0.1000 0.0283
## 5 0.7699 -nan 0.1000 0.0225
## 6 0.7475 -nan 0.1000 0.0177
## 7 0.7318 -nan 0.1000 0.0168
## 8 0.7115 -nan 0.1000 0.0138
## 9 0.6949 -nan 0.1000 0.0113
## 10 0.6792 -nan 0.1000 0.0107
## 20 0.5878 -nan 0.1000 0.0046
## 40 0.5153 -nan 0.1000 -0.0010
## 60 0.4914 -nan 0.1000 -0.0025
## 80 0.4761 -nan 0.1000 -0.0003
## 100 0.4581 -nan 0.1000 0.0000
## 120 0.4483 -nan 0.1000 -0.0025
## 140 0.4359 -nan 0.1000 -0.0020
## 150 0.4314 -nan 0.1000 -0.0017
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8863 -nan 0.1000 0.0671
## 2 0.8349 -nan 0.1000 0.0567
## 3 0.7876 -nan 0.1000 0.0379
## 4 0.7554 -nan 0.1000 0.0281
## 5 0.7168 -nan 0.1000 0.0343
## 6 0.6887 -nan 0.1000 0.0269
## 7 0.6613 -nan 0.1000 0.0209
## 8 0.6377 -nan 0.1000 0.0166
## 9 0.6180 -nan 0.1000 0.0155
## 10 0.6024 -nan 0.1000 0.0123
## 20 0.5099 -nan 0.1000 0.0057
## 40 0.4482 -nan 0.1000 -0.0029
## 60 0.4107 -nan 0.1000 -0.0014
## 80 0.3765 -nan 0.1000 -0.0048
## 100 0.3498 -nan 0.1000 -0.0008
## 120 0.3306 -nan 0.1000 -0.0013
## 140 0.3134 -nan 0.1000 -0.0005
## 150 0.3084 -nan 0.1000 -0.0013
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8805 -nan 0.1000 0.0632
## 2 0.8145 -nan 0.1000 0.0423
## 3 0.7762 -nan 0.1000 0.0309
## 4 0.7368 -nan 0.1000 0.0305
## 5 0.7023 -nan 0.1000 0.0331
## 6 0.6686 -nan 0.1000 0.0271
## 7 0.6405 -nan 0.1000 0.0205
## 8 0.6178 -nan 0.1000 0.0162
## 9 0.5953 -nan 0.1000 0.0177
## 10 0.5766 -nan 0.1000 0.0109
## 20 0.4744 -nan 0.1000 0.0029
## 40 0.3882 -nan 0.1000 0.0006
## 60 0.3446 -nan 0.1000 -0.0031
## 80 0.3153 -nan 0.1000 -0.0046
## 100 0.2880 -nan 0.1000 -0.0028
## 120 0.2697 -nan 0.1000 -0.0027
## 140 0.2508 -nan 0.1000 -0.0016
## 150 0.2426 -nan 0.1000 -0.0025
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8974 -nan 0.1000 0.0507
## 2 0.8515 -nan 0.1000 0.0359
## 3 0.8258 -nan 0.1000 0.0154
## 4 0.7887 -nan 0.1000 0.0317
## 5 0.7651 -nan 0.1000 0.0242
## 6 0.7415 -nan 0.1000 0.0189
## 7 0.7198 -nan 0.1000 0.0173
## 8 0.7044 -nan 0.1000 0.0112
## 9 0.6859 -nan 0.1000 0.0153
## 10 0.6697 -nan 0.1000 0.0074
## 20 0.5723 -nan 0.1000 0.0043
## 40 0.4896 -nan 0.1000 0.0000
## 60 0.4660 -nan 0.1000 -0.0026
## 80 0.4469 -nan 0.1000 -0.0023
## 100 0.4334 -nan 0.1000 -0.0024
## 120 0.4219 -nan 0.1000 -0.0030
## 140 0.4126 -nan 0.1000 -0.0003
## 150 0.4073 -nan 0.1000 -0.0012
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8765 -nan 0.1000 0.0588
## 2 0.8105 -nan 0.1000 0.0516
## 3 0.7676 -nan 0.1000 0.0479
## 4 0.7255 -nan 0.1000 0.0401
## 5 0.6859 -nan 0.1000 0.0299
## 6 0.6533 -nan 0.1000 0.0246
## 7 0.6245 -nan 0.1000 0.0232
## 8 0.6060 -nan 0.1000 0.0161
## 9 0.5859 -nan 0.1000 0.0146
## 10 0.5700 -nan 0.1000 0.0120
## 20 0.4765 -nan 0.1000 0.0035
## 40 0.4072 -nan 0.1000 -0.0009
## 60 0.3694 -nan 0.1000 -0.0021
## 80 0.3486 -nan 0.1000 -0.0024
## 100 0.3242 -nan 0.1000 -0.0012
## 120 0.3001 -nan 0.1000 -0.0014
## 140 0.2866 -nan 0.1000 -0.0022
## 150 0.2812 -nan 0.1000 -0.0011
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8715 -nan 0.1000 0.0728
## 2 0.8141 -nan 0.1000 0.0526
## 3 0.7598 -nan 0.1000 0.0415
## 4 0.7155 -nan 0.1000 0.0368
## 5 0.6837 -nan 0.1000 0.0299
## 6 0.6493 -nan 0.1000 0.0323
## 7 0.6189 -nan 0.1000 0.0233
## 8 0.5922 -nan 0.1000 0.0246
## 9 0.5682 -nan 0.1000 0.0118
## 10 0.5490 -nan 0.1000 0.0123
## 20 0.4274 -nan 0.1000 0.0001
## 40 0.3572 -nan 0.1000 0.0009
## 60 0.3157 -nan 0.1000 -0.0018
## 80 0.2878 -nan 0.1000 -0.0034
## 100 0.2671 -nan 0.1000 -0.0025
## 120 0.2490 -nan 0.1000 -0.0016
## 140 0.2329 -nan 0.1000 -0.0030
## 150 0.2256 -nan 0.1000 -0.0015
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8499 -nan 0.1000 0.0350
## 2 0.8106 -nan 0.1000 0.0367
## 3 0.7829 -nan 0.1000 0.0260
## 4 0.7599 -nan 0.1000 0.0143
## 5 0.7326 -nan 0.1000 0.0230
## 6 0.7087 -nan 0.1000 0.0133
## 7 0.6942 -nan 0.1000 0.0083
## 8 0.6798 -nan 0.1000 0.0066
## 9 0.6646 -nan 0.1000 0.0146
## 10 0.6499 -nan 0.1000 0.0113
## 20 0.5531 -nan 0.1000 0.0023
## 40 0.4817 -nan 0.1000 0.0023
## 60 0.4501 -nan 0.1000 -0.0014
## 80 0.4349 -nan 0.1000 -0.0022
## 100 0.4206 -nan 0.1000 -0.0026
## 120 0.4076 -nan 0.1000 -0.0005
## 140 0.3972 -nan 0.1000 -0.0009
## 150 0.3915 -nan 0.1000 -0.0012
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8399 -nan 0.1000 0.0531
## 2 0.7881 -nan 0.1000 0.0448
## 3 0.7448 -nan 0.1000 0.0325
## 4 0.7093 -nan 0.1000 0.0299
## 5 0.6749 -nan 0.1000 0.0278
## 6 0.6516 -nan 0.1000 0.0191
## 7 0.6281 -nan 0.1000 0.0178
## 8 0.6081 -nan 0.1000 0.0158
## 9 0.5880 -nan 0.1000 0.0095
## 10 0.5720 -nan 0.1000 0.0121
## 20 0.4720 -nan 0.1000 0.0041
## 40 0.4093 -nan 0.1000 -0.0008
## 60 0.3734 -nan 0.1000 -0.0007
## 80 0.3481 -nan 0.1000 -0.0035
## 100 0.3285 -nan 0.1000 -0.0004
## 120 0.3108 -nan 0.1000 -0.0010
## 140 0.2959 -nan 0.1000 -0.0017
## 150 0.2861 -nan 0.1000 -0.0026
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8325 -nan 0.1000 0.0484
## 2 0.7730 -nan 0.1000 0.0434
## 3 0.7267 -nan 0.1000 0.0383
## 4 0.6880 -nan 0.1000 0.0253
## 5 0.6552 -nan 0.1000 0.0202
## 6 0.6251 -nan 0.1000 0.0310
## 7 0.6008 -nan 0.1000 0.0203
## 8 0.5770 -nan 0.1000 0.0215
## 9 0.5579 -nan 0.1000 0.0162
## 10 0.5375 -nan 0.1000 0.0152
## 20 0.4406 -nan 0.1000 0.0003
## 40 0.3750 -nan 0.1000 0.0000
## 60 0.3284 -nan 0.1000 -0.0035
## 80 0.2930 -nan 0.1000 -0.0039
## 100 0.2744 -nan 0.1000 -0.0016
## 120 0.2527 -nan 0.1000 -0.0018
## 140 0.2361 -nan 0.1000 -0.0015
## 150 0.2289 -nan 0.1000 -0.0012
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.9077 -nan 0.1000 0.0368
## 2 0.8625 -nan 0.1000 0.0309
## 3 0.8260 -nan 0.1000 0.0295
## 4 0.7988 -nan 0.1000 0.0216
## 5 0.7716 -nan 0.1000 0.0186
## 6 0.7459 -nan 0.1000 0.0201
## 7 0.7296 -nan 0.1000 0.0133
## 8 0.7117 -nan 0.1000 0.0112
## 9 0.6928 -nan 0.1000 0.0123
## 10 0.6753 -nan 0.1000 0.0109
## 20 0.5753 -nan 0.1000 0.0035
## 40 0.4961 -nan 0.1000 0.0017
## 60 0.4712 -nan 0.1000 -0.0022
## 80 0.4510 -nan 0.1000 -0.0022
## 100 0.4387 -nan 0.1000 -0.0025
## 120 0.4275 -nan 0.1000 -0.0006
## 140 0.4170 -nan 0.1000 -0.0017
## 150 0.4127 -nan 0.1000 -0.0014
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8782 -nan 0.1000 0.0672
## 2 0.8245 -nan 0.1000 0.0522
## 3 0.7865 -nan 0.1000 0.0264
## 4 0.7477 -nan 0.1000 0.0389
## 5 0.7187 -nan 0.1000 0.0287
## 6 0.6823 -nan 0.1000 0.0254
## 7 0.6561 -nan 0.1000 0.0181
## 8 0.6363 -nan 0.1000 0.0085
## 9 0.6149 -nan 0.1000 0.0147
## 10 0.5966 -nan 0.1000 0.0123
## 20 0.4896 -nan 0.1000 0.0018
## 40 0.4222 -nan 0.1000 -0.0021
## 60 0.3823 -nan 0.1000 -0.0035
## 80 0.3477 -nan 0.1000 -0.0026
## 100 0.3257 -nan 0.1000 -0.0019
## 120 0.3116 -nan 0.1000 -0.0020
## 140 0.2974 -nan 0.1000 -0.0016
## 150 0.2876 -nan 0.1000 -0.0016
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8746 -nan 0.1000 0.0598
## 2 0.8145 -nan 0.1000 0.0564
## 3 0.7614 -nan 0.1000 0.0459
## 4 0.7237 -nan 0.1000 0.0328
## 5 0.6801 -nan 0.1000 0.0295
## 6 0.6441 -nan 0.1000 0.0269
## 7 0.6144 -nan 0.1000 0.0156
## 8 0.5892 -nan 0.1000 0.0178
## 9 0.5697 -nan 0.1000 0.0131
## 10 0.5502 -nan 0.1000 0.0150
## 20 0.4519 -nan 0.1000 -0.0023
## 40 0.3790 -nan 0.1000 0.0006
## 60 0.3410 -nan 0.1000 -0.0036
## 80 0.3152 -nan 0.1000 -0.0048
## 100 0.2852 -nan 0.1000 -0.0021
## 120 0.2616 -nan 0.1000 -0.0011
## 140 0.2408 -nan 0.1000 -0.0025
## 150 0.2323 -nan 0.1000 -0.0017
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.9151 -nan 0.1000 0.0300
## 2 0.8744 -nan 0.1000 0.0312
## 3 0.8427 -nan 0.1000 0.0270
## 4 0.8201 -nan 0.1000 0.0156
## 5 0.7985 -nan 0.1000 0.0178
## 6 0.7741 -nan 0.1000 0.0209
## 7 0.7505 -nan 0.1000 0.0179
## 8 0.7307 -nan 0.1000 0.0135
## 9 0.7092 -nan 0.1000 0.0099
## 10 0.6938 -nan 0.1000 0.0117
## 20 0.5938 -nan 0.1000 0.0014
## 40 0.5113 -nan 0.1000 0.0024
## 60 0.4818 -nan 0.1000 -0.0001
## 80 0.4643 -nan 0.1000 -0.0058
## 100 0.4536 -nan 0.1000 -0.0027
## 120 0.4411 -nan 0.1000 -0.0016
## 140 0.4307 -nan 0.1000 -0.0046
## 150 0.4272 -nan 0.1000 -0.0038
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8884 -nan 0.1000 0.0611
## 2 0.8343 -nan 0.1000 0.0435
## 3 0.7848 -nan 0.1000 0.0373
## 4 0.7494 -nan 0.1000 0.0305
## 5 0.7133 -nan 0.1000 0.0287
## 6 0.6819 -nan 0.1000 0.0215
## 7 0.6578 -nan 0.1000 0.0184
## 8 0.6334 -nan 0.1000 0.0185
## 9 0.6121 -nan 0.1000 0.0149
## 10 0.6002 -nan 0.1000 0.0092
## 20 0.5081 -nan 0.1000 -0.0001
## 40 0.4407 -nan 0.1000 -0.0033
## 60 0.4036 -nan 0.1000 -0.0024
## 80 0.3762 -nan 0.1000 -0.0036
## 100 0.3575 -nan 0.1000 -0.0010
## 120 0.3342 -nan 0.1000 -0.0002
## 140 0.3164 -nan 0.1000 -0.0026
## 150 0.3089 -nan 0.1000 -0.0022
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8877 -nan 0.1000 0.0613
## 2 0.8272 -nan 0.1000 0.0505
## 3 0.7838 -nan 0.1000 0.0308
## 4 0.7390 -nan 0.1000 0.0304
## 5 0.7043 -nan 0.1000 0.0282
## 6 0.6672 -nan 0.1000 0.0294
## 7 0.6483 -nan 0.1000 0.0094
## 8 0.6206 -nan 0.1000 0.0189
## 9 0.5954 -nan 0.1000 0.0198
## 10 0.5772 -nan 0.1000 0.0122
## 20 0.4639 -nan 0.1000 0.0021
## 40 0.3840 -nan 0.1000 -0.0017
## 60 0.3420 -nan 0.1000 -0.0009
## 80 0.3122 -nan 0.1000 -0.0022
## 100 0.2894 -nan 0.1000 -0.0038
## 120 0.2706 -nan 0.1000 -0.0028
## 140 0.2510 -nan 0.1000 -0.0022
## 150 0.2428 -nan 0.1000 -0.0023
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8609 -nan 0.1000 0.0635
## 2 0.8089 -nan 0.1000 0.0522
## 3 0.7602 -nan 0.1000 0.0477
## 4 0.7108 -nan 0.1000 0.0402
## 5 0.6797 -nan 0.1000 0.0263
## 6 0.6505 -nan 0.1000 0.0222
## 7 0.6263 -nan 0.1000 0.0126
## 8 0.6027 -nan 0.1000 0.0202
## 9 0.5827 -nan 0.1000 0.0145
## 10 0.5669 -nan 0.1000 0.0118
## 20 0.4688 -nan 0.1000 -0.0029
## 40 0.3785 -nan 0.1000 -0.0006
## 60 0.3374 -nan 0.1000 -0.0014
## 80 0.3080 -nan 0.1000 -0.0011
## 100 0.2841 -nan 0.1000 -0.0014
## 120 0.2632 -nan 0.1000 -0.0037
## 140 0.2456 -nan 0.1000 -0.0006
## 150 0.2388 -nan 0.1000 -0.0038
results <- resamples(models)
summary(results)
##
## Call:
## summary.resamples(object = results)
##
## Models: gbm, rpart, ctree, rf
## Number of resamples: 10
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## gbm 0.4687233 0.5078348 0.5495942 0.5420463 0.5789102 0.5998050 0
## rpart 0.5831178 0.5988980 0.6531474 0.6539180 0.6776290 0.7869798 0
## ctree 0.4746433 0.5928757 0.6495099 0.6276345 0.6739210 0.7495884 0
## rf 0.4452686 0.5316551 0.5939309 0.5686621 0.6148059 0.6362468 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## gbm 0.5714741 0.6663491 0.7055309 0.6948586 0.7242498 0.8067917 0
## rpart 0.6743902 0.7975012 0.8092424 0.8227469 0.8883526 0.9640653 0
## ctree 0.6677394 0.7206163 0.7917507 0.7993867 0.8868519 0.9663479 0
## rf 0.5705885 0.6745430 0.7168520 0.7200560 0.7759650 0.8655634 0
##
## Rsquared
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## gbm 0.24803460 0.3853941 0.5090232 0.4841910 0.5770362 0.6643076 0
## rpart 0.05703410 0.1533675 0.3202689 0.2988295 0.4404230 0.5106335 0
## ctree 0.06241944 0.2085845 0.3396858 0.3398067 0.4151717 0.6629144 0
## rf 0.16349349 0.3393713 0.4838505 0.4436417 0.5313505 0.6552906 0
set.seed(101)
stackControl <- train_control
# Ensemble the predictions of `models` to form a new combined prediction based on glm
stack.glm <- caretStack(models, method="glm", trControl=stackControl)
print(stack.glm)
## A glm ensemble of 4 base models: gbm, rpart, ctree, rf
##
## Ensemble results:
## Generalized Linear Model
##
## 348 samples
## 4 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 1 times)
## Summary of sample sizes: 313, 314, 312, 312, 314, 312, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 0.694036 0.493936 0.5488019
# make predictions for acreage using random forest
predictions<- predict(model,data5b)
# make predictions for seasonal loss using random forest
predictions_loss <- predict(model_loss,data5b)
#make predictions for seasonal loss using ensemble
predictions_ensemble<- predict(stack.glm,data5b)
#cforest_Prediction <- predict(cforest_model_loss, newdata=test_loss, OOB=TRUE, type = "response")
predictions_loss_all <- predict(model_loss_all,data5b)
#--end ensemble
# append predictions
mydat<- cbind(data5b,predictions)
mydat_loss <- cbind(data5b,predictions_loss)
#cforest_mydat_loss <- cbind(test_loss,cforest_Prediction)
mydat_loss_all <- cbind(data5b,predictions_loss_all)
mydat_loss_ensemble <- cbind(data5b,predictions_ensemble)
finaldat <- cbind.data.frame(data5a_statecountyear, mydat)
finaldat_loss <- cbind.data.frame(data5a_statecountyear, mydat_loss)
finaldat_loss_all <- cbind.data.frame(data5a_statecountyear, mydat_loss_all)
finaldat <- subset(finaldat, county != "Kootenai" & county != "Clearwater")
countyvector <- unique(finaldat$county)
county_rmse_r2 <- data.frame(matrix(ncol =2, nrow = 24))
county_rmse_r2$county <- countyvector
colnames(county_rmse_r2) <- c("rmse", "r2", "county")
ii = 0
for (i in countyvector ) {
ii <- ii + 1
countyplot <- subset(finaldat, county == i)
# 2.1. Average of actual data
avr_y_actual <- mean(countyplot$pct_acreage)
# 2.2. Total sum of squares
ss_total <- sum((countyplot$pct_acreage - avr_y_actual)^2)
# 2.3. Regression sum of squares
ss_regression <- sum((countyplot$predictions - avr_y_actual)^2)
# 2.4. Residual sum of squares
ss_residuals <- sum((countyplot$pct_acreage - countyplot$predictions)^2)
# 3. R2 Score
r2 <- round(1 - ss_residuals / ss_total, 2)
RMSE = function(m, o){
sqrt(mean((m - o)^2))
}
MAE <- function(m, o)
{
mean(abs(m - o))
}
rmse <- round(RMSE(countyplot$predictions, countyplot$pct_acreage), 2)
mae <- round(MAE(countyplot$predictions, countyplot$pct_acreage), 2)
county_rmse_r2[ii,1] <- rmse
county_rmse_r2[ii,2] <- r2
finaldat <- cbind.data.frame(data5a_statecountyear, mydat)
finaldat_loss <- cbind.data.frame(data5a_statecountyear, mydat_loss)
finaldat_loss_all <- cbind.data.frame(data5a_statecountyear, mydat_loss_all)
NRMSE1 <- nrmse_func(finaldat$loss, finaldat$predictions)
NRMSE2 <- nrmse_func(finaldat_loss$loss, finaldat_loss$predictions_loss)
fit1 <- lm(form3, data=data5b)
#pct_acreage historical vs. predicted
yearlist <- data.frame(c(2001:2015))
colnames(yearlist) <- "year"
countyplot <- merge(countyplot, yearlist, by=("year"), all=T)
countyplot$predictions[is.na(countyplot$predictions)] <- 0
countyplot$pct_acreage[is.na(countyplot$pct_acreage)] <- 0
par(mar=c(5, 5.1, 2, 3), family = 'serif', mgp=c(3.8, 1, 0), las=0)
plot(c(countyplot$pct_acreage) ~ c(countyplot$year), cex.lab = 1.5, cex.axis = 1.5, col = "red",
las = 2, xaxt = "n", xlab = "Year", ylab = "Percent Acreage Drought Claims", main = paste(i, " County, ", countyplot$state[1], ": R2 = ", r2, " RMSE = ", rmse, sep=""))
lines(countyplot$pct_acreage ~ countyplot$year, col = "red")
points(countyplot$predictions ~ countyplot$year, col = "blue")
lines(countyplot$predictions ~ countyplot$year, col = "blue")
axis(1, countyplot$year, 2001:2015, col.axis = "black", las = 2, cex.axis = 1.5)
legend("topright", legend=c("historical", "predicted"),
col=c("red", "blue"), lty=1, cex=1.5)
}